Ejemplo n.º 1
0
def unpack(tfrecord_dir, output_dir, resolution=None):
    print('Loading dataset "%s"' % tfrecord_dir)
    tflib.init_tf()
    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False)
    tflib.init_uninitialized_vars()

    print('Extracting images to "%s"' % output_dir)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    idx = 0
    labels = []
    while True:
        if idx % 10 == 0:
            print('%d\r' % idx, end='', flush=True)
        images, lbls = dset.get_minibatch_np(1)
        if images is None:
            break
        if images.shape[1] == 1:
            img = PIL.Image.fromarray(images[0][0], 'L')
        else:
            img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB')
        if resolution is not None:
            img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS)
        assert lbls.shape[0] == 1
        labels.append(lbls[0])
        png_fname = make_png_path(output_dir, idx)
        os.makedirs(os.path.dirname(png_fname), exist_ok=True)
        img.save(png_fname)
        idx += 1
    np.savez(f'{output_dir}/pack_extras.npz', labels=np.array(labels, dtype=np.uint8), num_images=idx)
    print('Extracted %d images.' % idx)
Ejemplo n.º 2
0
def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels):
    max_label_size = 0 if ignore_labels else 'full'
    print('Loading dataset "%s"' % tfrecord_dir_a)
    tflib.init_tf()
    dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle=False)
    print('Loading dataset "%s"' % tfrecord_dir_b)
    dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle=False)
    tflib.init_uninitialized_vars()

    print('Comparing datasets')
    idx = 0
    identical_images = 0
    identical_labels = 0
    while True:
        if idx % 100 == 0:
            print('%d\r' % idx, end='', flush=True)
        images_a, labels_a = dset_a.get_minibatch_np(1)
        images_b, labels_b = dset_b.get_minibatch_np(1)
        if images_a is None or images_b is None:
            if images_a is not None or images_b is not None:
                print('Datasets contain different number of images')
            break
        if images_a.shape == images_b.shape and np.all(images_a == images_b):
            identical_images += 1
        else:
            print('Image %d is different' % idx)
        if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b):
            identical_labels += 1
        else:
            print('Label %d is different' % idx)
        idx += 1
    print('Identical images: %d / %d' % (identical_images, idx))
    if not ignore_labels:
        print('Identical labels: %d / %d' % (identical_labels, idx))
Ejemplo n.º 3
0
def display(tfrecord_dir):
    print('Loading dataset "%s"' % tfrecord_dir)
    tflib.init_tf({'gpu_options.allow_growth': True})
    dset = dataset.TFRecordDataset(tfrecord_dir,
                                   max_label_size='full',
                                   repeat=False,
                                   shuffle_mb=0)
    tflib.init_uninitialized_vars()
    import cv2  # pip install opencv-python

    idx = 0
    while True:
        try:
            images, labels = dset.get_minibatch_np(1)
        except tf.errors.OutOfRangeError:
            break
        if idx == 0:
            print('Displaying images')
            cv2.namedWindow('dataset_tool')
            print('Press SPACE or ENTER to advance, ESC to exit')
        print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist()))
        cv2.imshow('dataset_tool', images[0].transpose(
            1, 2, 0)[:, :, ::-1])  # CHW => HWC, RGB => BGR
        idx += 1
        if cv2.waitKey() == 27:
            break
    print('\nDisplayed %d images.' % idx)
Ejemplo n.º 4
0
def extract(tfrecord_dir, output_dir):
    print('Loading dataset "%s"' % tfrecord_dir)
    tflib.init_tf({'gpu_options.allow_growth': True})
    dset = dataset.TFRecordDataset(tfrecord_dir,
                                   max_label_size=0,
                                   repeat=False,
                                   shuffle_mb=0)
    tflib.init_uninitialized_vars()

    print('Extracting images to "%s"' % output_dir)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    idx = 0
    while True:
        if idx % 10 == 0:
            print('%d\r' % idx, end='', flush=True)
        try:
            images, _labels = dset.get_minibatch_np(1)
        except tf.errors.OutOfRangeError:
            break
        if images.shape[1] == 1:
            img = PIL.Image.fromarray(images[0][0], 'L')
        else:
            img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB')
        img.save(os.path.join(output_dir, 'img%08d.png' % idx))
        idx += 1
    print('Extracted %d images.' % idx)
Ejemplo n.º 5
0
def info(tfrecord_dir):
    print()
    print('%-20s%s' % ('Dataset name:', os.path.basename(tfrecord_dir)))

    bytes_total = 0
    bytes_max = 0
    num_files = 0
    for f in sorted(glob.glob(os.path.join(tfrecord_dir, '*'))):
        if os.path.isfile(f):
            fs = os.stat(f).st_size
            bytes_total += fs
            bytes_max = max(bytes_max, fs)
            num_files += 1
    print('%-20s%.2f' % ('Total size GB:', bytes_total / (1 << 30)))
    print('%-20s%.2f' % ('Largest file GB:', bytes_max / (1 << 30)))
    print('%-20s%d' % ('Num files:', num_files))

    tflib.init_tf()
    dset = dataset.TFRecordDataset(tfrecord_dir,
                                   max_label_size='full',
                                   repeat=False,
                                   shuffle=False)
    tflib.init_uninitialized_vars()

    print('%-20s%d' % ('Image width:', dset.shape[2]))
    print('%-20s%d' % ('Image height:', dset.shape[1]))
    print('%-20s%d' % ('Image channels:', dset.shape[0]))
    print('%-20s%s' % ('Image datatype:', dset.dtype))
    print('%-20s%d' % ('Label size:', dset.label_size))
    print('%-20s%s' % ('Label datatype:', dset.label_dtype))

    num_images = 0
    label_min = np.finfo(np.float64).max
    label_max = np.finfo(np.float64).min
    label_norm = 0
    lod = max(dset.resolution_log2 - 2, 0)
    while True:
        print('\r%-20s%d' % ('Num images:', num_images), end='', flush=True)
        _images, labels = dset.get_minibatch_np(10000, lod=lod)  # not accurate
        if labels is None:
            break
        num_images += labels.shape[0]
        if dset.label_size:
            label_min = min(label_min, np.min(labels))
            label_max = max(label_max, np.max(labels))
            label_norm += np.sum(np.sqrt(np.sum(np.square(labels), axis=1)))

    print('\r%-20s%d' % ('Num images:', num_images))
    print(
        '%-20s%s' %
        ('Label range:', '%g -- %g' %
         (label_min, label_max) if num_images and dset.label_size else 'n/a'))
    print('%-20s%s' % ('Label L2 norm:', '%g' % (label_norm / num_images)
                       if num_images and dset.label_size else 'n/a'))
    print()
Ejemplo n.º 6
0
def display(tfrecord_dir):
    print('Loading dataset "%s"' % tfrecord_dir)
    tflib.init_tf({'gpu_options.allow_growth': True})
    # label_file = './datasets/car_labels_new_rotation_oversample_classifier/car_labels_new_rotation_oversample_classifier-rxx.labels'
    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0)
    tflib.init_uninitialized_vars()
    import cv2  # pip install opencv-python

    idx = 0
    while True:
        try:
            images, labels = dset.get_minibatch_np(1)
        except tf.errors.OutOfRangeError:
            break
        if idx == 0:
            print('Displaying images')
            cv2.namedWindow('dataset_tool')
            print('Press SPACE or ENTER to advance, ESC to exit')
        label_list = labels[0].tolist()
        '''
        rotation_cos = labels[:, 108]
        rotation_sin = labels[:, 109]
        angle = np.arctan2(rotation_sin, rotation_cos)
        print('Angle:', (angle * 360 / (2 * np.pi) + 360 + 0) % 360)
        print('X:    ', rotation_cos)
        print('Y:    ', rotation_sin)
        new_rotation_sin = rotation_sin * -1
        angle = np.arctan2(new_rotation_sin, rotation_cos)
        print('Mirror Angle:', (angle * 360 / (2 * np.pi) + 360 + 0) % 360)
        print('Mirror X:    ', rotation_cos)
        print('Mirror Y:    ', new_rotation_sin)

        print(labels[:, 108:108+8])
        '''

        # rotations = labels[:, 108:108 + 8]
        # rotations_index = (np.argmax(rotations) + np.random.choice([-1, 1])) % 8
        # new_rotation = np.zeros([8])
        # new_rotation[rotations_index] = 1

        print('\nidx = %-8d\nlabel = %s' % (idx, label_list))
        print('background:', labels[:, -6:])
        print('roation:', labels[:, 108:108 + 8])
        # print('old rotation', label_list[108:108 + 8])
        # print('new rotation', new_rotation)
        cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1])  # CHW => HWC, RGB => BGR

        idx += 1
        if cv2.waitKey() == 27:
            break
    print('\nDisplayed %d images.' % idx)
Ejemplo n.º 7
0
def convert_to_hdf5(hdf5_filename, tfrecord_dir, compress):
    print('Loading dataset "%s"' % tfrecord_dir)
    tflib.init_tf()
    dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False)
    tflib.init_uninitialized_vars()
    with HDF5Exporter(hdf5_filename, resolution=dset.shape[1], channels=dset.shape[0], compress=compress) as h5:
        all_labels = []
        while True:
            images, labels = dset.get_minibatch_np(1)
            if images is None:
                break
            h5.add_images(images)
            all_labels.append(labels)
        all_labels = np.concatenate(all_labels)
        if all_labels.size:
            h5.add_labels(all_labels)
Ejemplo n.º 8
0
def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels):
    max_label_size = 0 if ignore_labels else "full"
    print('Loading dataset "%s"' % tfrecord_dir_a)
    tflib.init_tf({"gpu_options.allow_growth": True})
    dset_a = dataset.TFRecordDataset(tfrecord_dir_a,
                                     max_label_size=max_label_size,
                                     repeat=False,
                                     shuffle_mb=0)
    print('Loading dataset "%s"' % tfrecord_dir_b)
    dset_b = dataset.TFRecordDataset(tfrecord_dir_b,
                                     max_label_size=max_label_size,
                                     repeat=False,
                                     shuffle_mb=0)
    tflib.init_uninitialized_vars()

    print("Comparing datasets")
    idx = 0
    identical_images = 0
    identical_labels = 0
    while True:
        if idx % 100 == 0:
            print("%d\r" % idx, end="", flush=True)
        try:
            images_a, labels_a = dset_a.get_minibatch_np(1)
        except tf.errors.OutOfRangeError:
            images_a, labels_a = None, None
        try:
            images_b, labels_b = dset_b.get_minibatch_np(1)
        except tf.errors.OutOfRangeError:
            images_b, labels_b = None, None
        if images_a is None or images_b is None:
            if images_a is not None or images_b is not None:
                print("Datasets contain different number of images")
            break
        if images_a.shape == images_b.shape and np.all(images_a == images_b):
            identical_images += 1
        else:
            print("Image %d is different" % idx)
        if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b):
            identical_labels += 1
        else:
            print("Label %d is different" % idx)
        idx += 1
    print("Identical images: %d / %d" % (identical_images, idx))
    if not ignore_labels:
        print("Identical labels: %d / %d" % (identical_labels, idx))
Ejemplo n.º 9
0
def embed(batch_size, resolution, img, network, iteration, seed=6600):
    tf.reset_default_graph()
    print('Loading networks from "%s"...' % network)
    tflib.init_tf()
    _, _, G = pretrained_networks.load_networks(network)
    img_in = tf.constant(img)
    opt = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8)
    noise_vars = [var for name, var in G.components.synthesis.vars.items() if name.startswith('noise')]

    G_kwargs = dnnlib.EasyDict()
    G_kwargs.randomize_noise = False
    G_syn = G.components.synthesis
    loss_list = []
    p_loss_list = []
    m_loss_list = []
    dl_list = []
    si_list = []

    rnd = np.random.RandomState(seed)
    dlatent_avg = [var for name, var in G.vars.items() if name.startswith('dlatent_avg')][0].eval()
    dlatent_avg = np.expand_dims(np.expand_dims(dlatent_avg, 0), 1)
    dlatent_avg = dlatent_avg.repeat(12, 1)
    dlatent = tf.get_variable('dlatent', dtype=tf.float32, initializer=tf.constant(dlatent_avg),
                              trainable=True)
    synth_img = G_syn.get_output_for(dlatent, is_training=False, **G_kwargs)
    # synth_img = (synth_img + 1.0) / 2.0

    with tf.variable_scope('mse_loss'):
        mse_loss = tf.reduce_mean(tf.square(img_in - synth_img))
    with tf.variable_scope('perceptual_loss'):
        vgg_in = tf.concat([img_in, synth_img], 0)
        tf.keras.backend.set_image_data_format('channels_first')
        vgg = tf.keras.applications.VGG16(include_top=False, input_tensor=vgg_in, input_shape=(3, 128, 128),
                                          weights='/gdata2/fengrl/metrics/vgg.h5',
                                          pooling=None)
        h1 = vgg.get_layer('block1_conv1').output
        h2 = vgg.get_layer('block1_conv2').output
        h3 = vgg.get_layer('block3_conv2').output
        h4 = vgg.get_layer('block4_conv2').output
        pcep_loss = tf.reduce_mean(tf.square(h1[0] - h1[1])) + tf.reduce_mean(tf.square(h2[0] - h2[1])) + \
                    tf.reduce_mean(tf.square(h3[0] - h3[1])) + tf.reduce_mean(tf.square(h4[0] - h4[1]))
    loss = 0.5 * mse_loss + 0.5 * pcep_loss
    with tf.control_dependencies([loss]):
        train_op = opt.minimize(loss, var_list=[dlatent])

    tflib.init_uninitialized_vars()
    # rnd = np.random.RandomState(seed)
    tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars})  # [height, width]
    for i in range(iteration):
        loss_, p_loss_, m_loss_, dl_, si_, _ = tflib.run([loss, pcep_loss, mse_loss, dlatent, synth_img, train_op])
        loss_list.append(loss_)
        p_loss_list.append(p_loss_)
        m_loss_list.append(m_loss_)
        dl_loss_ = np.sum(np.square(dl_-dlatent_avg))
        dl_list.append(dl_loss_)
        if i % 500 == 0:
            si_list.append(si_)
        if i % 100 == 0:
            print('Loss %f, mse %f, ppl %f, dl %f, step %d' % (loss_, m_loss_, p_loss_,
                                                               dl_loss_, i))
    return loss_list, p_loss_list, m_loss_list, dl_list, si_list
Ejemplo n.º 10
0
def training_loop_vc(
    G_args={},  # Options for generator network.
    D_args={},  # Options for discriminator network.
    I_args={},  # Options for infogan-head/vcgan-head network.
    I_info_args={},  # Options for infogan-head/vcgan-head network.
    G_opt_args={},  # Options for generator optimizer.
    D_opt_args={},  # Options for discriminator optimizer.
    G_loss_args={},  # Options for generator loss.
    D_loss_args={},  # Options for discriminator loss.
    dataset_args={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    metric_arg_list=[],  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    use_info_gan=False,  # Whether to use info-gan.
    use_vc_head=False,  # Whether to use vc-head.
    use_vc_head_with_cls=False,  # Whether to use classification in discriminator.
    data_dir=None,  # Directory to load datasets from.
    G_smoothing_kimg=10.0,  # Half-life of the running average of generator weights.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    lazy_regularization=True,  # Perform regularization as a separate training step?
    G_reg_interval=4,  # How often the perform regularization for G? Ignored if lazy_regularization=False.
    D_reg_interval=16,  # How often the perform regularization for D? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod=True,  # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=50,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=False,  # Include weight histograms in the tfevents file?
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets=False,  # Construct new networks according to G_args and D_args before resuming training?
    traversal_grid=False,  # Used for disentangled representation learning.
    n_discrete=3,  # Number of discrete latents in model.
    n_continuous=4,  # Number of continuous latents in model.
    n_samples_per=10):  # Number of samples for each line in traversal.

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                        verbose=True,
                                        **dataset_args)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            G = tflib.Network('G',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **G_args)
            D = tflib.Network('D',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **D_args)
            if use_info_gan or use_vc_head or use_vc_head_with_cls:
                I = tflib.Network('I',
                                  num_channels=training_set.shape[0],
                                  resolution=training_set.shape[1],
                                  label_size=training_set.label_size,
                                  **I_args)
                if use_vc_head_with_cls:
                    I_info = tflib.Network('I_info',
                                           num_channels=training_set.shape[0],
                                           resolution=training_set.shape[1],
                                           label_size=training_set.label_size,
                                           **I_info_args)

            Gs = G.clone('Gs')
        if resume_pkl is not None:
            print('Loading networks from "%s"...' % resume_pkl)
            if use_info_gan or use_vc_head:
                rG, rD, rI, rGs = misc.load_pkl(resume_pkl)
            elif use_vc_head_with_cls:
                rG, rD, rI, rI_info, rGs = misc.load_pkl(resume_pkl)
            else:
                rG, rD, rGs = misc.load_pkl(resume_pkl)
            if resume_with_new_nets:
                G.copy_vars_from(rG)
                D.copy_vars_from(rD)
                if use_info_gan or use_vc_head or use_vc_head_with_cls:
                    I.copy_vars_from(rI)
                    if use_vc_head_with_cls:
                        I_info.copy_vars_from(rI_info)
                Gs.copy_vars_from(rGs)
            else:
                G = rG
                D = rD
                if use_info_gan or use_vc_head or use_vc_head_with_cls:
                    I = rI
                    if use_vc_head_with_cls:
                        I_info = rI_info
                Gs = rGs

    # Print layers and generate initial image snapshot.
    G.print_layers()
    D.print_layers()
    if use_info_gan or use_vc_head or use_vc_head_with_cls:
        I.print_layers()
        if use_vc_head_with_cls:
            I_info.print_layers()
    # pdb.set_trace()
    sched = training_schedule(cur_nimg=total_kimg * 1000,
                              training_set=training_set,
                              **sched_args)
    if traversal_grid:
        grid_size, grid_latents, grid_labels = get_grid_latents(
            n_discrete, n_continuous, n_samples_per, G, grid_labels)
    else:
        grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    print('grid_latents.shape:', grid_latents.shape)
    print('grid_labels.shape:', grid_labels.shape)
    # pdb.set_trace()
    grid_fakes, _ = Gs.run(grid_latents,
                           grid_labels,
                           is_validation=True,
                           minibatch_size=sched.minibatch_gpu,
                           randomize_noise=False)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)
        Gs_beta = 0.5**tf.div(tf.cast(minibatch_size_in,
                                      tf.float32), G_smoothing_kimg *
                              1000.0) if G_smoothing_kimg > 0.0 else 0.0

    # Setup optimizers.
    G_opt_args = dict(G_opt_args)
    D_opt_args = dict(D_opt_args)
    for args, reg_interval in [(G_opt_args, G_reg_interval),
                               (D_opt_args, D_reg_interval)]:
        args['minibatch_multiplier'] = minibatch_multiplier
        args['learning_rate'] = lrate_in
        if lazy_regularization:
            mb_ratio = reg_interval / (reg_interval + 1)
            args['learning_rate'] *= mb_ratio
            if 'beta1' in args: args['beta1'] **= mb_ratio
            if 'beta2' in args: args['beta2'] **= mb_ratio
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
    G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
    D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')
            if use_info_gan or use_vc_head or use_vc_head_with_cls:
                I_gpu = I if gpu == 0 else I.clone(I.name + '_shadow')
                if use_vc_head_with_cls:
                    I_info_gpu = I_info if gpu == 0 else I_info.clone(
                        I_info.name + '_shadow')

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                sched = training_schedule(cur_nimg=int(resume_kimg * 1000),
                                          training_set=training_set,
                                          **sched_args)
                reals_var = tf.Variable(
                    name='reals',
                    trainable=False,
                    initial_value=tf.zeros([sched.minibatch_gpu] +
                                           training_set.shape))
                labels_var = tf.Variable(name='labels',
                                         trainable=False,
                                         initial_value=tf.zeros([
                                             sched.minibatch_gpu,
                                             training_set.label_size
                                         ]))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, labels_write = process_reals(
                    reals_write, labels_write, lod_in, mirror_augment,
                    training_set.dynamic_range, drange_net)
                reals_write = tf.concat(
                    [reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                labels_write = tf.concat(
                    [labels_write, labels_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                data_fetch_ops += [tf.assign(labels_var, labels_write)]
                reals_read = reals_var[:minibatch_gpu_in]
                labels_read = labels_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            lod_assign_ops = []
            if 'lod' in G_gpu.vars:
                lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
            if 'lod' in D_gpu.vars:
                lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]
            with tf.control_dependencies(lod_assign_ops):
                with tf.name_scope('G_loss'):
                    if use_info_gan or use_vc_head:
                        G_loss, G_reg, I_loss, _ = dnnlib.util.call_func_by_name(
                            G=G_gpu,
                            D=D_gpu,
                            I=I_gpu,
                            opt=G_opt,
                            training_set=training_set,
                            minibatch_size=minibatch_gpu_in,
                            **G_loss_args)
                    elif use_vc_head_with_cls:
                        G_loss, G_reg, I_loss, I_info_loss = dnnlib.util.call_func_by_name(
                            G=G_gpu,
                            D=D_gpu,
                            I=I_gpu,
                            I_info=I_info_gpu,
                            opt=G_opt,
                            training_set=training_set,
                            minibatch_size=minibatch_gpu_in,
                            **G_loss_args)
                    else:
                        G_loss, G_reg = dnnlib.util.call_func_by_name(
                            G=G_gpu,
                            D=D_gpu,
                            opt=G_opt,
                            training_set=training_set,
                            minibatch_size=minibatch_gpu_in,
                            **G_loss_args)
                with tf.name_scope('D_loss'):
                    D_loss, D_reg = dnnlib.util.call_func_by_name(
                        G=G_gpu,
                        D=D_gpu,
                        opt=D_opt,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        labels=labels_read,
                        **D_loss_args)

            # Register gradients.
            if not lazy_regularization:
                if G_reg is not None: G_loss += G_reg
                if D_reg is not None: D_loss += D_reg
            else:
                if G_reg is not None:
                    G_reg_opt.register_gradients(
                        tf.reduce_mean(G_reg * G_reg_interval),
                        G_gpu.trainables)
                if D_reg is not None:
                    D_reg_opt.register_gradients(
                        tf.reduce_mean(D_reg * D_reg_interval),
                        D_gpu.trainables)
            # print('G_gpu.trainables:', G_gpu.trainables)
            # print('D_gpu.trainables:', D_gpu.trainables)
            # print('I_gpu.trainables:', I_gpu.trainables)
            if use_info_gan or use_vc_head:
                GI_gpu_trainables = collections.OrderedDict(
                    list(G_gpu.trainables.items()) +
                    list(I_gpu.trainables.items()))
                G_opt.register_gradients(tf.reduce_mean(G_loss + I_loss),
                                         GI_gpu_trainables)
                D_opt.register_gradients(tf.reduce_mean(D_loss),
                                         D_gpu.trainables)
                # G_opt.register_gradients(tf.reduce_mean(I_loss),
                # GI_gpu_trainables)
                # D_opt.register_gradients(tf.reduce_mean(I_loss),
                # D_gpu.trainables)
            elif use_vc_head_with_cls:
                GIIinfo_gpu_trainables = collections.OrderedDict(
                    list(G_gpu.trainables.items()) +
                    list(I_gpu.trainables.items()) +
                    list(I_info_gpu.trainables.items()))
                G_opt.register_gradients(
                    tf.reduce_mean(G_loss + I_loss + I_info_loss),
                    GIIinfo_gpu_trainables)
                D_opt.register_gradients(tf.reduce_mean(D_loss),
                                         D_gpu.trainables)
            else:
                G_opt.register_gradients(tf.reduce_mean(G_loss),
                                         G_gpu.trainables)
                D_opt.register_gradients(tf.reduce_mean(D_loss),
                                         D_gpu.trainables)

            # if use_info_gan:
            # # INFO-GAN-HEAD loss
            # G_opt.register_gradients(tf.reduce_mean(I_loss),
            # G_gpu.trainables)
            # G_opt.register_gradients(tf.reduce_mean(I_loss),
            # I_gpu.trainables)
            # D_opt.register_gradients(tf.reduce_mean(I_loss),
            # D_gpu.trainables)

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    G_train_op = G_opt.apply_updates()
    D_train_op = D_opt.apply_updates()
    G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
    D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
    Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms()
        D.setup_weight_histograms()
        if use_info_gan or use_vc_head or use_vc_head_with_cls:
            I.setup_weight_histograms()
            if use_vc_head_with_cls:
                I_info.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg,
                                  training_set=training_set,
                                  **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu, sched.lod)
        if reset_opt_for_new_lod:
            if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(
                    sched.lod) != np.ceil(prev_lod):
                G_opt.reset_optimizer_state()
                D_opt.reset_optimizer_state()
        prev_lod = sched.lod

        # Run training ops.
        feed_dict = {
            lod_in: sched.lod,
            lrate_in: sched.G_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            run_G_reg = (lazy_regularization
                         and running_mb_counter % G_reg_interval == 0)
            run_D_reg = (lazy_regularization
                         and running_mb_counter % D_reg_interval == 0)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([G_train_op, data_fetch_op], feed_dict)
                if run_G_reg:
                    tflib.run(G_reg_op, feed_dict)
                tflib.run([D_train_op, Gs_update_op], feed_dict)
                if run_D_reg:
                    tflib.run(D_reg_op, feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(G_train_op, feed_dict)
                if run_G_reg:
                    for _round in rounds:
                        tflib.run(G_reg_op, feed_dict)
                tflib.run(Gs_update_op, feed_dict)
                for _round in rounds:
                    tflib.run(data_fetch_op, feed_dict)
                    tflib.run(D_train_op, feed_dict)
                if run_D_reg:
                    for _round in rounds:
                        tflib.run(D_reg_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/lod', sched.lod),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):
                grid_fakes, _ = Gs.run(grid_latents,
                                       grid_labels,
                                       is_validation=True,
                                       minibatch_size=sched.minibatch_gpu,
                                       randomize_noise=False)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                if use_info_gan or use_vc_head:
                    misc.save_pkl((G, D, I, Gs), pkl)
                elif use_vc_head_with_cls:
                    misc.save_pkl((G, D, I, I_info, Gs), pkl)
                else:
                    misc.save_pkl((G, D, Gs), pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            data_dir=dnnlib.convert_path(data_dir),
                            num_gpus=num_gpus,
                            tf_config=tf_config)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % sched.lod,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    if use_info_gan or use_vc_head:
        misc.save_pkl((G, D, I, Gs),
                      dnnlib.make_run_dir_path('network-final.pkl'))
    elif use_vc_head_with_cls:
        misc.save_pkl((G, D, I, I_info, Gs),
                      dnnlib.make_run_dir_path('network-final.pkl'))
    else:
        misc.save_pkl((G, D, Gs),
                      dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 11
0
def training_loop(
        run_dir='.',  # Output directory.
        G_args={},  # Options for generator network.
        D_args={},  # Options for discriminator network.
        G_opt_args={},  # Options for generator optimizer.
        D_opt_args={},  # Options for discriminator optimizer.
        loss_args={},  # Options for loss function.
        train_dataset_args={},  # Options for dataset to train with.
        # Options for dataset to evaluate metrics against.
    metric_dataset_args={},
        augment_args={},  # Options for adaptive augmentations.
        metric_arg_list=[],  # Metrics to evaluate during training.
        num_gpus=1,  # Number of GPUs to use.
        minibatch_size=32,  # Global minibatch size.
        minibatch_gpu=4,  # Number of samples processed at a time by one GPU.
        # Half-life of the exponential moving average (EMA) of generator weights.
    G_smoothing_kimg=10,
        G_smoothing_rampup=None,  # EMA ramp-up coefficient.
        # Number of minibatches to run in the inner loop.
    minibatch_repeats=4,
        lazy_regularization=True,  # Perform regularization as a separate training step?
        # How often the perform regularization for G? Ignored if lazy_regularization=False.
    G_reg_interval=4,
        # How often the perform regularization for D? Ignored if lazy_regularization=False.
        D_reg_interval=16,
        # Total length of the training, measured in thousands of real images.
        total_kimg=25000,
        kimg_per_tick=4,  # Progress snapshot interval.
        # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    image_snapshot_ticks=50,
        # How often to save network snapshots? None = only save 'networks-final.pkl'.
        network_snapshot_ticks=50,
        resume_pkl=None,  # Network pickle to resume training from.
        # Callback function for determining whether to abort training.
    abort_fn=None,
        progress_fn=None,  # Callback function for updating training progress.
):
    assert minibatch_size % (num_gpus * minibatch_gpu) == 0
    start_time = time.time()

    print('Loading training set...')
    training_set = dataset.load_dataset(**train_dataset_args)
    print('Image shape:', np.int32(training_set.shape).tolist())
    print('Label shape:', [training_set.label_size])
    print()

    print('Constructing networks...')
    with tf.device('/gpu:0'):
        G = tflib.Network('G',
                          num_channels=training_set.shape[0],
                          resolution=training_set.shape[1],
                          label_size=training_set.label_size,
                          **G_args)
        D = tflib.Network('D',
                          num_channels=training_set.shape[0],
                          resolution=training_set.shape[1],
                          label_size=training_set.label_size,
                          **D_args)
        Gs = G.clone('Gs')
        if resume_pkl is not None:
            print(f'Resuming from "{resume_pkl}"')
            with dnnlib.util.open_url(resume_pkl) as f:
                rG, rD, rGs = pickle.load(f)
            G.copy_vars_from(rG)
            D.copy_vars_from(rD)
            Gs.copy_vars_from(rGs)
    G.print_layers()
    D.print_layers()

    print('Exporting sample images...')
    grid_size, grid_reals, grid_labels = setup_snapshot_image_grid(
        training_set)
    save_image_grid(grid_reals,
                    os.path.join(run_dir, 'reals.png'),
                    drange=[0, 255],
                    grid_size=grid_size)
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    grid_fakes = Gs.run(grid_latents,
                        grid_labels,
                        is_validation=True,
                        minibatch_size=minibatch_gpu)
    # save_image_grid(grid_fakes, os.path.join(
    #     run_dir, 'fakes_init.png'), drange=[-1, 1], grid_size=grid_size)

    print(f'Replicating networks across {num_gpus} GPUs...')
    G_gpus = [G]
    D_gpus = [D]
    for gpu in range(1, num_gpus):
        with tf.device(f'/gpu:{gpu}'):
            G_gpus.append(G.clone(f'{G.name}_gpu{gpu}'))
            D_gpus.append(D.clone(f'{D.name}_gpu{gpu}'))

    print('Initializing augmentations...')
    aug = None
    if augment_args.get('class_name', None) is not None:
        aug = dnnlib.util.construct_class_by_name(**augment_args)
        aug.init_validation_set(D_gpus=D_gpus, training_set=training_set)

    print('Setting up optimizers...')
    G_opt_args = dict(G_opt_args)
    D_opt_args = dict(D_opt_args)
    for args, reg_interval in [(G_opt_args, G_reg_interval),
                               (D_opt_args, D_reg_interval)]:
        args[
            'minibatch_multiplier'] = minibatch_size // num_gpus // minibatch_gpu
        if lazy_regularization:
            mb_ratio = reg_interval / (reg_interval + 1)
            args['learning_rate'] *= mb_ratio
            if 'beta1' in args:
                args['beta1'] **= mb_ratio
            if 'beta2' in args:
                args['beta2'] **= mb_ratio
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
    G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
    D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)

    print('Constructing training graph...')
    data_fetch_ops = []
    training_set.configure(minibatch_gpu)
    for gpu, (G_gpu, D_gpu) in enumerate(zip(G_gpus, D_gpus)):
        with tf.name_scope(f'Train_gpu{gpu}'), tf.device(f'/gpu:{gpu}'):

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                real_images_var = tf.Variable(
                    name='images',
                    trainable=False,
                    initial_value=tf.zeros([minibatch_gpu] +
                                           training_set.shape))
                real_labels_var = tf.Variable(name='labels',
                                              trainable=False,
                                              initial_value=tf.zeros([
                                                  minibatch_gpu,
                                                  training_set.label_size
                                              ]))
                real_images_write, real_labels_write = training_set.get_minibatch_tf(
                )
                real_images_write = tflib.convert_images_from_uint8(
                    real_images_write)
                data_fetch_ops += [
                    tf.assign(real_images_var, real_images_write)
                ]
                data_fetch_ops += [
                    tf.assign(real_labels_var, real_labels_write)
                ]

            # Evaluate loss function and register gradients.
            fake_labels = training_set.get_random_labels_tf(minibatch_gpu)
            terms = dnnlib.util.call_func_by_name(G=G_gpu,
                                                  D=D_gpu,
                                                  aug=aug,
                                                  fake_labels=fake_labels,
                                                  real_images=real_images_var,
                                                  real_labels=real_labels_var,
                                                  **loss_args)
            if lazy_regularization:
                if terms.G_reg is not None:
                    G_reg_opt.register_gradients(
                        tf.reduce_mean(terms.G_reg * G_reg_interval),
                        G_gpu.trainables)
                if terms.D_reg is not None:
                    D_reg_opt.register_gradients(
                        tf.reduce_mean(terms.D_reg * D_reg_interval),
                        D_gpu.trainables)
            else:
                if terms.G_reg is not None:
                    terms.G_loss += terms.G_reg
                if terms.D_reg is not None:
                    terms.D_loss += terms.D_reg
            G_opt.register_gradients(tf.reduce_mean(terms.G_loss),
                                     G_gpu.trainables)
            D_opt.register_gradients(tf.reduce_mean(terms.D_loss),
                                     D_gpu.trainables)

    print('Finalizing training ops...')
    data_fetch_op = tf.group(*data_fetch_ops)
    G_train_op = G_opt.apply_updates()
    D_train_op = D_opt.apply_updates()
    G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
    D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
    Gs_beta_in = tf.placeholder(tf.float32, name='Gs_beta_in', shape=[])
    Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta_in)
    tflib.init_uninitialized_vars()
    with tf.device('/gpu:0'):
        peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()

    print('Initializing metrics...')
    summary_log = tf.summary.FileWriter(run_dir)
    metrics = []
    for args in metric_arg_list:
        metric = dnnlib.util.construct_class_by_name(**args)
        metric.configure(dataset_args=metric_dataset_args, run_dir=run_dir)
        metrics.append(metric)

    print(f'Training for {total_kimg} kimg...')
    print()
    if progress_fn is not None:
        progress_fn(0, total_kimg)
    tick_start_time = time.time()
    maintenance_time = tick_start_time - start_time
    cur_nimg = 0
    cur_tick = -1
    tick_start_nimg = cur_nimg
    running_mb_counter = 0

    done = False
    while not done:

        # Compute EMA decay parameter.
        Gs_nimg = G_smoothing_kimg * 1000.0
        if G_smoothing_rampup is not None:
            Gs_nimg = min(Gs_nimg, cur_nimg * G_smoothing_rampup)
        Gs_beta = 0.5**(minibatch_size / max(Gs_nimg, 1e-8))

        # Run training ops.
        for _repeat_idx in range(minibatch_repeats):
            rounds = range(0, minibatch_size, minibatch_gpu * num_gpus)
            run_G_reg = (lazy_regularization
                         and running_mb_counter % G_reg_interval == 0)
            run_D_reg = (lazy_regularization
                         and running_mb_counter % D_reg_interval == 0)
            cur_nimg += minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([G_train_op, data_fetch_op])
                if run_G_reg:
                    tflib.run(G_reg_op)
                tflib.run([D_train_op, Gs_update_op], {Gs_beta_in: Gs_beta})
                if run_D_reg:
                    tflib.run(D_reg_op)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(G_train_op)
                    if run_G_reg:
                        tflib.run(G_reg_op)
                tflib.run(Gs_update_op, {Gs_beta_in: Gs_beta})
                for _round in rounds:
                    tflib.run(data_fetch_op)
                    tflib.run(D_train_op)
                    if run_D_reg:
                        tflib.run(D_reg_op)

            # Run validation.
            if aug is not None:
                aug.run_validation(minibatch_size=minibatch_size)

        # Tune augmentation parameters.
        if aug is not None:
            aug.tune(minibatch_size * minibatch_repeats)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000) or (abort_fn is not None
                                                   and abort_fn())
        if done or cur_tick < 0 or cur_nimg >= tick_start_nimg + kimg_per_tick * 1000:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_end_time = time.time()
            total_time = tick_end_time - start_time
            tick_time = tick_end_time - tick_start_time

            # Report progress.
            print(' '.join([
                f"tick {autosummary('Progress/tick', cur_tick):<5d}",
                f"kimg {autosummary('Progress/kimg', cur_nimg / 1000.0):<8.1f}",
                f"time {dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)):<12s}",
                f"sec/tick {autosummary('Timing/sec_per_tick', tick_time):<7.1f}",
                f"sec/kimg {autosummary('Timing/sec_per_kimg', tick_time / tick_kimg):<7.2f}",
                f"maintenance {autosummary('Timing/maintenance_sec', maintenance_time):<6.1f}",
                f"gpumem {autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30):<5.1f}",
                f"augment {autosummary('Progress/augment', aug.strength if aug is not None else 0):.3f}",
            ]))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))
            if progress_fn is not None:
                progress_fn(cur_nimg // 1000, total_kimg)

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    done or cur_tick % image_snapshot_ticks == 0):
                grid_fakes = Gs.run(grid_latents,
                                    grid_labels,
                                    is_validation=True,
                                    minibatch_size=minibatch_gpu)
                save_image_grid(grid_fakes,
                                os.path.join(
                                    run_dir,
                                    f'fakes{cur_nimg // 1000:06d}.png'),
                                drange=[-1, 1],
                                grid_size=grid_size)
            if network_snapshot_ticks is not None and (
                    done or cur_tick % network_snapshot_ticks == 0):
                pkl = os.path.join(
                    run_dir, f'network-snapshot-{cur_nimg // 1000:06d}.pkl')
                with open(pkl, 'wb') as f:
                    pickle.dump((G, D, Gs), f)
                if len(metrics):
                    print('Evaluating metrics...')
                    for metric in metrics:
                        metric.run(pkl, num_gpus=num_gpus)

            # Update summaries.
            for metric in metrics:
                metric.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            tick_start_time = time.time()
            maintenance_time = tick_start_time - tick_end_time

    print()
    print('Exiting...')
    summary_log.close()
    training_set.close()
Ejemplo n.º 12
0
def training_loop(
    G_args={},  # Options for generator network.
    D_args={},  # Options for discriminator network.
    G_opt_args={},  # Options for generator optimizer.
    D_opt_args={},  # Options for discriminator optimizer.
    AE_opt_args=None,  # Options for autoencoder optimizer.
    G_loss_args={},  # Options for generator loss.
    D_loss_args={},  # Options for discriminator loss.
    AE_loss_args=None,  # Options for autoencoder loss.
    dataset_args={},  # Options for dataset.load_dataset().
    dataset_args_eval={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    metric_arg_list=[],  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    train_data_dir=None,  # Directory to load datasets from.
    eval_data_dir=None,  # Directory to load datasets from.
    G_smoothing_kimg=10.0,  # Half-life of the running average of generator weights.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    lazy_regularization=True,  # Perform regularization as a separate training step?
    G_reg_interval=4,  # How often the perform regularization for G? Ignored if lazy_regularization=False.
    D_reg_interval=16,  # How often the perform regularization for D? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod=True,  # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=50,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=True,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=True,  # Include weight histograms in the tfevents file?
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets=False,
    resume_with_own_vars=False
):  # Construct new networks according to G_args and D_args before resuming training?

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    print("Loading train set from %s..." % dataset_args.tfrecord_dir)
    training_set = dataset.load_dataset(
        data_dir=dnnlib.convert_path(train_data_dir),
        verbose=True,
        **dataset_args)
    print("Loading eval set from %s..." % dataset_args_eval.tfrecord_dir)
    eval_set = dataset.load_dataset(
        data_dir=dnnlib.convert_path(eval_data_dir),
        verbose=True,
        **dataset_args_eval)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)
    # Freeze Discriminator
    if D_args['freeze']:
        num_layers = np.log2(training_set.resolution) - 1
        layers = int(np.round(num_layers * 3. / 8.))
        scope = ['Output', 'scores_out']
        for layer in range(layers):
            scope += ['.*%d' % 2**layer]
            if 'train_scope' in D_args:
                scope[-1] += '.*%d' % D_args['train_scope']
        D_args['train_scope'] = scope

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is '' or resume_with_new_nets or resume_with_own_vars:
            print('Constructing networks...')
            G = tflib.Network('G',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **G_args)
            D = tflib.Network('D',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **D_args)
            Gs = G.clone('Gs')
        if resume_pkl is not '':
            print('Loading networks from "%s"...' % resume_pkl)
            rG, rD, rGs = misc.load_pkl(resume_pkl)
            if resume_with_new_nets:
                G.copy_vars_from(rG)
                D.copy_vars_from(rD)
                Gs.copy_vars_from(rGs)
            else:
                G = rG
                D = rD
                Gs = rGs

    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    # SVD stuff
    if 'syn_svd' in G_args or 'map_svd' in G_args:
        # Run graph to calculate SVD
        grid_latents_smol = grid_latents[:1]
        rho = np.array([1])
        grid_fakes = G.run(grid_latents_smol,
                           grid_labels,
                           rho,
                           is_validation=True)
        grid_fakes = Gs.run(grid_latents_smol,
                            grid_labels,
                            rho,
                            is_validation=True)
        load_d_fake = D.run(grid_reals[:1], rho, is_validation=True)
        with tf.device('/gpu:0'):
            # Create SVD-decomposed graph
            rG, rD, rGs = G, D, Gs
            G_lambda_mask = {
                var: np.ones(G.vars[var].shape[-1])
                for var in G.vars if 'SVD/s' in var
            }
            D_lambda_mask = {
                'D/' + var: np.ones(D.vars[var].shape[-1])
                for var in D.vars if 'SVD/s' in var
            }
            G_reduce_dims = {
                var: (0, int(Gs.vars[var].shape[-1]))
                for var in Gs.vars if 'SVD/s' in var
            }
            G_args['lambda_mask'] = G_lambda_mask
            G_args['reduce_dims'] = G_reduce_dims
            D_args['lambda_mask'] = D_lambda_mask

            # Create graph with no SVD operations
            G = tflib.Network('G',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=rG.input_shapes[1][1],
                              factorized=True,
                              **G_args)
            D = tflib.Network('D',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=rD.input_shapes[1][1],
                              factorized=True,
                              **D_args)
            Gs = G.clone('Gs')

            grid_fakes = G.run(grid_latents_smol,
                               grid_labels,
                               rho,
                               is_validation=True,
                               minibatch_size=1)
            grid_fakes = Gs.run(grid_latents_smol,
                                grid_labels,
                                rho,
                                is_validation=True,
                                minibatch_size=1)

            G.copy_vars_from(rG)
            D.copy_vars_from(rD)
            Gs.copy_vars_from(rGs)

    # Reduce per-gpu minibatch size to fit in 16GB GPU memory
    if grid_reals.shape[2] >= 1024:
        sched_args.minibatch_gpu_base = 2
    print('Batch size', sched_args.minibatch_gpu_base)

    # Generate initial image snapshot.
    G.print_layers()
    D.print_layers()
    sched = training_schedule(cur_nimg=total_kimg * 1000,
                              training_set=training_set,
                              **sched_args)
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    rho = np.array([1])
    grid_fakes = Gs.run(grid_latents,
                        grid_labels,
                        rho,
                        is_validation=True,
                        minibatch_size=sched.minibatch_gpu)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)
    if resume_pkl is not '':
        load_d_real = rD.run(grid_reals[:1], rho, is_validation=True)
        load_d_fake = rD.run(grid_fakes[:1], rho, is_validation=True)
        d_fake = D.run(grid_fakes[:1], rho, is_validation=True)
        d_real = D.run(grid_reals[:1], rho, is_validation=True)
        print('Factorized fake', d_fake, 'loaded fake', load_d_fake,
              'factorized real', d_real, 'loaded real', load_d_real)
        print('(should match)')
    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)
        Gs_beta = 0.5**tf.div(tf.cast(minibatch_size_in,
                                      tf.float32), G_smoothing_kimg *
                              1000.0) if G_smoothing_kimg > 0.0 else 0.0

    # Setup optimizers.
    G_opt_args = dict(G_opt_args)
    D_opt_args = dict(D_opt_args)
    for args, reg_interval in [(G_opt_args, G_reg_interval),
                               (D_opt_args, D_reg_interval)]:
        args['minibatch_multiplier'] = minibatch_multiplier
        args['learning_rate'] = lrate_in
        if lazy_regularization:
            mb_ratio = reg_interval / (reg_interval + 1)
            args['learning_rate'] *= mb_ratio
            if 'beta1' in args: args['beta1'] **= mb_ratio
            if 'beta2' in args: args['beta2'] **= mb_ratio
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
    G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
    D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)
    if AE_opt_args is not None:
        AE_opt_args = dict(AE_opt_args)
        AE_opt_args['minibatch_multiplier'] = minibatch_multiplier
        AE_opt_args['learning_rate'] = lrate_in
        AE_opt = tflib.Optimizer(name='TrainAE', **AE_opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                sched = training_schedule(cur_nimg=int(resume_kimg * 1000),
                                          training_set=training_set,
                                          **sched_args)
                reals_var = tf.Variable(
                    name='reals',
                    trainable=False,
                    initial_value=tf.zeros([sched.minibatch_gpu] +
                                           training_set.shape))
                labels_var = tf.Variable(name='labels',
                                         trainable=False,
                                         initial_value=tf.zeros([
                                             sched.minibatch_gpu,
                                             training_set.label_size
                                         ]))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, labels_write = process_reals(
                    reals_write, labels_write, lod_in, mirror_augment,
                    training_set.dynamic_range, drange_net)
                reals_write = tf.concat(
                    [reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                labels_write = tf.concat(
                    [labels_write, labels_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                data_fetch_ops += [tf.assign(labels_var, labels_write)]
                reals_read = reals_var[:minibatch_gpu_in]
                labels_read = labels_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            lod_assign_ops = []
            if 'lod' in G_gpu.vars:
                lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
            if 'lod' in D_gpu.vars:
                lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]
            with tf.control_dependencies(lod_assign_ops):
                with tf.name_scope('G_loss'):
                    if G_loss_args['func_name'] == 'training.loss.G_l1':
                        G_loss_args['reals'] = reals_read
                    else:
                        G_loss, G_reg = dnnlib.util.call_func_by_name(
                            G=G_gpu,
                            D=D_gpu,
                            opt=G_opt,
                            training_set=training_set,
                            minibatch_size=minibatch_gpu_in,
                            **G_loss_args)
                with tf.name_scope('D_loss'):
                    D_loss, D_reg = dnnlib.util.call_func_by_name(
                        G=G_gpu,
                        D=D_gpu,
                        opt=D_opt,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        labels=labels_read,
                        **D_loss_args)

            # Register gradients.
            if not lazy_regularization:
                if G_reg is not None: G_loss += G_reg
                if D_reg is not None: D_loss += D_reg
            else:
                if G_reg is not None:
                    G_reg_opt.register_gradients(
                        tf.reduce_mean(G_reg * G_reg_interval),
                        G_gpu.trainables)
                if D_reg is not None:
                    D_reg_opt.register_gradients(
                        tf.reduce_mean(D_reg * D_reg_interval),
                        D_gpu.trainables)

            G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)
            D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables)

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    G_train_op = G_opt.apply_updates()
    D_train_op = D_opt.apply_updates()
    G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
    D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
    Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms()
        D.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0

    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg,
                                  training_set=training_set,
                                  **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu, sched.lod)
        if reset_opt_for_new_lod:
            if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(
                    sched.lod) != np.ceil(prev_lod):
                G_opt.reset_optimizer_state()
                D_opt.reset_optimizer_state()
        prev_lod = sched.lod

        # Run training ops
        feed_dict = {
            lod_in: sched.lod,
            lrate_in: sched.G_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            ae_iter_mul = 10
            ae_rounds = range(0, sched.minibatch_size,
                              sched.minibatch_gpu * num_gpus * ae_iter_mul)
            run_G_reg = (lazy_regularization
                         and running_mb_counter % G_reg_interval == 0)
            run_D_reg = (lazy_regularization
                         and running_mb_counter % D_reg_interval == 0)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([G_train_op, data_fetch_op], feed_dict)
                if run_G_reg:
                    tflib.run(G_reg_op, feed_dict)
                tflib.run([D_train_op, Gs_update_op], feed_dict)
                if run_D_reg:
                    tflib.run(D_reg_op, feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    _g_loss, _ = tflib.run([G_loss, G_train_op], feed_dict)
                if run_G_reg:
                    for _round in rounds:
                        tflib.run(G_reg_op, feed_dict)
                tflib.run(Gs_update_op, feed_dict)
                for _round in rounds:
                    tflib.run(data_fetch_op, feed_dict)
                    tflib.run(D_train_op, feed_dict)
                if run_D_reg:
                    for _round in rounds:
                        tflib.run(D_reg_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/lod', sched.lod),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):
                print('g loss', _g_loss)
                grid_fakes = Gs.run(grid_latents,
                                    grid_labels,
                                    is_validation=True,
                                    minibatch_size=sched.minibatch_gpu)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)

            if network_snapshot_ticks is not None and cur_tick % network_snapshot_ticks == 0 or done:
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl((G, D, Gs), pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            data_dir=dnnlib.convert_path(eval_data_dir),
                            num_gpus=num_gpus,
                            tf_config=tf_config,
                            rho=rho)
            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % sched.lod,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
    eval_set.close()
Ejemplo n.º 13
0
def embed(batch_size, resolution, imgs, network, iteration, result_dir, seed=6600):
    tf.reset_default_graph()
    G_args = dnnlib.EasyDict(func_name='training.networks_stylegan2_alpha.G_main')
    G_args.fmap_base = 8 << 10
    print('Loading networks from "%s"...' % network)
    tflib.init_tf()
    G = tflib.Network('G', num_channels=3, resolution=128, **G_args)
    _, _, Gs = pretrained_networks.load_networks(network)
    G.copy_vars_from(Gs)
    img_in = tf.placeholder(tf.float32)
    opt = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8)
    opt_T = tf.train.AdamOptimizer(learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8)
    noise_vars = [var for name, var in G.components.synthesis.vars.items() if name.startswith('noise')]
    alpha_vars = [var for name, var in G.components.synthesis.vars.items() if name.endswith('alpha')]
    alpha_evals = [alpha.eval() for alpha in alpha_vars]

    G_kwargs = dnnlib.EasyDict()
    G_kwargs.randomize_noise = False
    G_syn = G.components.synthesis

    rnd = np.random.RandomState(seed)
    dlatent_avg = [var for name, var in G.vars.items() if name.startswith('dlatent_avg')][0].eval()
    dlatent_avg = np.expand_dims(np.expand_dims(dlatent_avg, 0), 1)
    dlatent_avg = dlatent_avg.repeat(12, 1)
    dlatent = tf.get_variable('dlatent', dtype=tf.float32, initializer=tf.constant(dlatent_avg),
                              trainable=True)
    T = tf.get_variable('T', dtype=tf.float32, initializer=tf.constant(0.95))
    alpha_pre = [scale_alpha_exp(alpha_eval, T) for alpha_eval in alpha_evals]
    synth_img = G_syn.get_output_for(dlatent, is_training=False, alpha_pre=alpha_pre, **G_kwargs)
    # synth_img = (synth_img + 1.0) / 2.0

    with tf.variable_scope('mse_loss'):
        mse_loss = tf.reduce_mean(tf.square(img_in - synth_img))
    with tf.variable_scope('perceptual_loss'):
        vgg_in = tf.concat([img_in, synth_img], 0)
        tf.keras.backend.set_image_data_format('channels_first')
        vgg = tf.keras.applications.VGG16(include_top=False, input_tensor=vgg_in, input_shape=(3, 128, 128),
                                          weights='/gdata2/fengrl/metrics/vgg.h5',
                                          pooling=None)
        h1 = vgg.get_layer('block1_conv1').output
        h2 = vgg.get_layer('block1_conv2').output
        h3 = vgg.get_layer('block3_conv2').output
        h4 = vgg.get_layer('block4_conv2').output
        pcep_loss = tf.reduce_mean(tf.square(h1[0] - h1[1])) + tf.reduce_mean(tf.square(h2[0] - h2[1])) + \
                    tf.reduce_mean(tf.square(h3[0] - h3[1])) + tf.reduce_mean(tf.square(h4[0] - h4[1]))
    loss = 0.5 * mse_loss + 0.5 * pcep_loss
    with tf.control_dependencies([loss]):
        grads = tf.gradients(mse_loss, [dlatent, T])
        train_op1 = opt.apply_gradients(zip([grads[0]], [dlatent]))
        train_op2 = opt_T.apply_gradients(zip([grads[1]], [T]))
        train_op = tf.group(train_op1, train_op2)
    reset_opt = tf.variables_initializer(opt.variables()+opt_T.variables())
    reset_dl = tf.variables_initializer([dlatent, T])

    tflib.init_uninitialized_vars()
    # rnd = np.random.RandomState(seed)
    tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars})  # [height, width]
    idx = 0
    metrics_l = []
    metrics_p = []
    metrics_m = []
    metrics_d = []
    T_list = []
    for img in imgs:
        img = np.expand_dims(img, 0)
        loss_list = []
        p_loss_list = []
        m_loss_list = []
        dl_list = []
        si_list = []
        # tflib.set_vars({alpha: alpha_np for alpha, alpha_np in zip(alpha_vars, alpha_evals)})
        tflib.run([reset_opt, reset_dl])
        for i in range(iteration):
            loss_, p_loss_, m_loss_, dl_, si_, t_, _ = tflib.run([loss, pcep_loss, mse_loss, dlatent, synth_img, T, train_op],
                                                             {img_in: img})
            loss_list.append(loss_)
            p_loss_list.append(p_loss_)
            m_loss_list.append(m_loss_)
            dl_loss_ = np.sum(np.square(dl_-dlatent_avg))
            dl_list.append(dl_loss_)
            if i % 500 == 0:
                si_list.append(si_)
            if i % 100 == 0:
                print('idx %d, Loss %f, mse %f, ppl %f, dl %f, t %f, step %d' % (idx, loss_, m_loss_, p_loss_, dl_loss_, t_, i))
        print('T: %f, loss: %f, ppl: %f, mse: %f, d: %f' % (t_,
                                                               loss_list[-1],
                                                               p_loss_list[-1],
                                                               m_loss_list[-1],
                                                               dl_list[-1]))
        metrics_l.append(loss_list[-1])
        metrics_p.append(p_loss_list[-1])
        metrics_m.append(m_loss_list[-1])
        metrics_d.append(dl_list[-1])
        T_list.append(t_)
        misc.save_image_grid(np.concatenate(si_list, 0), os.path.join(result_dir, 'si%d.png' % idx), drange=[-1, 1])
        misc.save_image_grid(si_list[-1], os.path.join(result_dir, 'sifinal%d.png' % idx),
                             drange=[-1, 1])
        with open(os.path.join(result_dir, 'metric_l%d.txt' % idx), 'w') as f:
            for l_ in loss_list:
                f.write(str(l_) + '\n')
        with open(os.path.join(result_dir, 'metric_p%d.txt' % idx), 'w') as f:
            for l_ in p_loss_list:
                f.write(str(l_) + '\n')
        with open(os.path.join(result_dir, 'metric_m%d.txt' % idx), 'w') as f:
            for l_ in m_loss_list:
                f.write(str(l_) + '\n')
        with open(os.path.join(result_dir, 'metric_d%d.txt' % idx), 'w') as f:
            for l_ in dl_list:
                f.write(str(l_) + '\n')
        idx += 1

    l_mean = np.mean(metrics_l)
    p_mean = np.mean(metrics_p)
    m_mean = np.mean(metrics_m)
    d_mean = np.mean(metrics_d)
    with open(os.path.join(result_dir, 'metric_lmpd.txt'), 'w') as f:
        f.write(str(alpha_evals)+'\n')
        for i in range(len(metrics_l)):
            f.write(str(T_list[i])+'    '+str(metrics_l[i])+'    '+str(metrics_m[i])+'    '+str(metrics_p[i])+'    '+str(metrics_d[i])+'\n')

    print('Overall metrics: loss_mean %f, ppl_mean %f, mse_mean %f, d_mean %f' % (l_mean, p_mean, m_mean, d_mean))
    with open(os.path.join(result_dir, 'mean_metrics.txt'), 'w') as f:
        f.write('loss %f\n' % l_mean)
        f.write('mse %f\n' % m_mean)
        f.write('ppl %f\n' % p_mean)
        f.write('dl %f\n' % d_mean)
Ejemplo n.º 14
0
def invert(model_path, _image, _wp, _latent_shape):
    print("Inverting")
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    # Build graph.
    print("Inverting : Build Graph.")
    sess = tf.get_default_session()

    batch_size = 4
    input_shape = E.input_shape
    input_shape[0] = batch_size  # default batch size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255

    wp = _wp
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255

    w_enc = E.get_output_for(x, phase=False)
    wp_enc = tf.reshape(w_enc, _latent_shape)
    setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    print("Inverting : Settings for Optimization.")
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_255)
    x_rec_feat = perceptual_model(x_rec_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3])
    w_enc_new = E.get_output_for(x_rec, phase=False)
    wp_enc_new = tf.reshape(w_enc_new, _latent_shape)
    loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2])
    loss = (loss_pix + 5e-5 * loss_feat + 2.0 * loss_enc)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Invert image
    print("Start Inverting.")
    num_iterations = 40
    num_results = 2
    save_interval = num_iterations // num_results

    context_images = np.zeros(input_shape, np.uint8)

    context_image = resize_image(load_image(_image), (image_size, image_size))

    # Inverting Context Image.
    context_images[0] = np.transpose(context_image, [2, 0, 1])
    context_input = context_images.astype(np.float32) / 255 * 2.0 - 1.0

    sess.run([setter], {x: context_input})
    context_output = sess.run([wp, x_rec])
    context_output[1] = adjust_pixel_range(context_output[1])
    context_image = np.transpose(context_images[0], [1, 2, 0])

    for step in tqdm(range(1, num_iterations + 1), leave=False):
        sess.run(train_op, {x: context_input})
        if step == num_iterations or step % save_interval == 0:
            context_output = sess.run([wp, x_rec])
            context_output[1] = adjust_pixel_range(context_output[1])
            if step == num_iterations: context_image = context_output[1][0]

    return context_image
Ejemplo n.º 15
0
def training_loop_hd(
    I_args={},  # Options for generator network.
    M_args={},  # Options for discriminator network.
    I_info_args={},  # Options for class network.
    I_opt_args={},  # Options for generator optimizer.
    I_loss_args={},  # Options for generator loss.
    resume_G_pkl=None,  # G network pickle to help training.
    dataset_args={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    metric_arg_list=[],  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    data_dir=None,  # Directory to load datasets from.
    I_smoothing_kimg=10.0,  # Half-life of the running average of generator weights.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    lazy_regularization=True,  # Perform regularization as a separate training step?
    I_reg_interval=4,  # How often the perform regularization for G? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod=True,  # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=50,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=False,  # Include weight histograms in the tfevents file?
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets=False,  # Construct new networks according to I_args and M_args before resuming training?
    traversal_grid=False,  # Used for disentangled representation learning.
    n_discrete=0,  # Number of discrete latents in model.
    n_continuous=10,  # Number of continuous latents in model.
    n_samples_per=4,  # Number of samples for each line in traversal.
    use_hd_with_cls=False,  # If use info_loss.
    resolution_manual=1024,  # Resolution of generated images.
    use_level_training=False,  # If use level training (hierarchical optimization strategy).
    level_I_kimg=1000,  # Number of kimg of tick for I_level training.
    use_std_in_m=False,  # If output prior std in M net.
    prior_latent_size=512,  # Prior latent size.
    use_hyperplane=False,  # If use hyperplane model.
    pretrained_type='with_stylegan2'):  # Pretrained type for G.

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                        verbose=True,
                                        **dataset_args)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            I = tflib.Network('I',
                              num_channels=training_set.shape[0],
                              resolution=resolution_manual,
                              label_size=training_set.label_size,
                              **I_args)
            M = tflib.Network('M',
                              num_channels=training_set.shape[0],
                              resolution=resolution_manual,
                              label_size=training_set.label_size,
                              **M_args)
            Is = I.clone('Is')
            if use_hd_with_cls:
                I_info = tflib.Network('I_info',
                                       num_channels=training_set.shape[0],
                                       resolution=resolution_manual,
                                       label_size=training_set.label_size,
                                       **I_info_args)
        if resume_pkl is not None:
            print('Loading networks from "%s"...' % resume_pkl)
            if use_hd_with_cls:
                rI, rM, rIs, rI_info = misc.load_pkl(resume_pkl)
            else:
                rI, rM, rIs = misc.load_pkl(resume_pkl)
            if resume_with_new_nets:
                I.copy_vars_from(rI)
                M.copy_vars_from(rM)
                Is.copy_vars_from(rIs)
                if use_hd_with_cls:
                    I_info.copy_vars_from(rI_info)
            else:
                I = rI
                M = rM
                Is = rIs
                if use_hd_with_cls:
                    I_info = rI_info

        print('Loading generator from "%s"...' % resume_G_pkl)
        if pretrained_type == 'with_stylegan2':
            rG, rD, rGs = misc.load_pkl(resume_G_pkl)
            G = rG
            D = rD
            Gs = rGs
        elif pretrained_type == 'with_cascadeVAE':
            rG = misc.load_pkl(resume_G_pkl)
            G = rG
            Gs = rG

    # Print layers and generate initial image snapshot.
    I.print_layers()
    M.print_layers()
    # pdb.set_trace()
    training_set_resolution_log2 = int(np.log2(resolution_manual))
    sched = training_schedule(
        cur_nimg=total_kimg * 1000,
        training_set_resolution_log2=training_set_resolution_log2,
        **sched_args)

    if not use_hyperplane:
        grid_size, grid_latents, grid_labels = get_grid_latents(
            n_discrete, n_continuous, n_samples_per, G, grid_labels)
        print('grid_size:', grid_size)
        print('grid_latents.shape:', grid_latents.shape)
        print('grid_labels.shape:', grid_labels.shape)
        if resolution_manual >= 256:
            grid_size = (grid_size[0], grid_size[1] // 5)
            grid_latents = grid_latents[:grid_latents.shape[0] // 5]
            grid_labels = grid_labels[:grid_labels.shape[0] // 5]
        prior_traj_latents = M.run(grid_latents,
                                   is_validation=True,
                                   minibatch_size=sched.minibatch_gpu)
        if use_std_in_m:
            prior_traj_latents = prior_traj_latents[:, :prior_latent_size]
    else:
        grid_size = (n_samples_per, n_continuous)
        grid_labels = np.tile(grid_labels[:1],
                              (n_continuous * n_samples_per, 1))
        latent_dirs = get_latent_dirs(n_continuous)
        prior_traj_latents = get_prior_traj_by_dirs(latent_dirs, M,
                                                    n_samples_per,
                                                    prior_latent_size,
                                                    grid_labels, sched)
        if resolution_manual >= 256:
            grid_size = (grid_size[0], grid_size[1] // 5)
            prior_traj_latents = prior_traj_latents[:prior_traj_latents.
                                                    shape[0] // 5]
            grid_labels = grid_labels[:grid_labels.shape[0] // 5]
    print('prior_traj_latents.shape:', prior_traj_latents.shape)
    # pdb.set_trace()
    prior_traj_latents_show = np.reshape(
        prior_traj_latents, [-1, n_samples_per, prior_latent_size])
    print_traj(prior_traj_latents_show)
    grid_fakes = Gs.run(prior_traj_latents,
                        grid_labels,
                        is_validation=True,
                        minibatch_size=sched.minibatch_gpu,
                        randomize_noise=True,
                        normalize_latents=False)
    grid_fakes = add_outline(grid_fakes, width=1)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)
    if (n_continuous == 2) and (n_discrete == 0):
        n_per_line = 20
        ex_latent_value = 3
        prior_grid_latents, prior_grid_labels = get_2d_grid_latents(
            low=-ex_latent_value,
            high=ex_latent_value,
            n_per_line=n_per_line,
            grid_labels=grid_labels)
        grid_showing_fakes = Gs.run(prior_grid_latents,
                                    prior_grid_labels,
                                    is_validation=True,
                                    minibatch_size=sched.minibatch_gpu,
                                    randomize_noise=True,
                                    normalize_latents=False)
        grid_showing_fakes = add_outline(grid_showing_fakes, width=1)
        misc.save_image_grid(
            grid_showing_fakes,
            dnnlib.make_run_dir_path('fakes_init_2d_prior_grid.png'),
            drange=drange_net,
            grid_size=[n_per_line, n_per_line])
        img_to_draw = Image.open(
            dnnlib.make_run_dir_path('fakes_init_2d_prior_grid.png'))
        img_to_draw = img_to_draw.convert('RGB')
        img_to_draw = draw_traj_on_prior_grid(img_to_draw,
                                              prior_traj_latents_show,
                                              ex_latent_value, n_per_line)
        img_to_draw.save(
            dnnlib.make_run_dir_path('fakes_init_2d_prior_grid_drawn.png'))

    if use_level_training:
        ending_level = training_set_resolution_log2 - 1
    else:
        ending_level = 1

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)
        Is_beta = 0.5**tf.div(tf.cast(minibatch_size_in,
                                      tf.float32), I_smoothing_kimg *
                              1000.0) if I_smoothing_kimg > 0.0 else 0.0

    # Setup optimizers.
    I_opt_args = dict(I_opt_args)
    for args, reg_interval in [(I_opt_args, I_reg_interval)]:
        args['minibatch_multiplier'] = minibatch_multiplier
        args['learning_rate'] = lrate_in
        if lazy_regularization:
            mb_ratio = reg_interval / (reg_interval + 1)
            args['learning_rate'] *= mb_ratio
            if 'beta1' in args: args['beta1'] **= mb_ratio
            if 'beta2' in args: args['beta2'] **= mb_ratio

    I_opts = []
    I_reg_opts = []
    for n_level in range(ending_level):
        I_opts.append(tflib.Optimizer(name='TrainI_%d' % n_level,
                                      **I_opt_args))
        I_reg_opts.append(
            tflib.Optimizer(name='RegI_%d' % n_level,
                            share=I_opts[-1],
                            **I_opt_args))

    # Build training graph for each GPU.
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of I and M.
            I_gpu = I if gpu == 0 else I.clone(I.name + '_shadow')
            M_gpu = M if gpu == 0 else M.clone(M.name + '_shadow')
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            if use_hd_with_cls:
                I_info_gpu = I_info if gpu == 0 else I_info.clone(I_info.name +
                                                                  '_shadow')

            # Evaluate loss functions.
            lod_assign_ops = []
            I_losses = []
            I_regs = []
            if 'lod' in I_gpu.vars:
                lod_assign_ops += [tf.assign(I_gpu.vars['lod'], lod_in)]
            if 'lod' in M_gpu.vars:
                lod_assign_ops += [tf.assign(M_gpu.vars['lod'], lod_in)]
            for n_level in range(ending_level):
                with tf.control_dependencies(lod_assign_ops):
                    with tf.name_scope('I_loss_%d' % n_level):
                        if use_hd_with_cls:
                            I_loss, I_reg = dnnlib.util.call_func_by_name(
                                I=I_gpu,
                                M=M_gpu,
                                G=G_gpu,
                                I_info=I_info_gpu,
                                opt=I_opts[n_level],
                                training_set=training_set,
                                minibatch_size=minibatch_gpu_in,
                                **I_loss_args)
                        else:
                            I_loss, I_reg = dnnlib.util.call_func_by_name(
                                I=I_gpu,
                                M=M_gpu,
                                G=G_gpu,
                                opt=I_opts[n_level],
                                n_levels=(n_level +
                                          1) if use_level_training else None,
                                training_set=training_set,
                                minibatch_size=minibatch_gpu_in,
                                **I_loss_args)
                        I_losses.append(I_loss)
                        I_regs.append(I_reg)

                # Register gradients.
                if not lazy_regularization:
                    if I_regs[n_level] is not None:
                        I_losses[n_level] += I_regs[n_level]
                else:
                    if I_regs[n_level] is not None:
                        I_reg_opts[n_level].register_gradients(
                            tf.reduce_mean(I_regs[n_level] * I_reg_interval),
                            I_gpu.trainables)

                if use_hd_with_cls:
                    MIIinfo_gpu_trainables = collections.OrderedDict(
                        list(M_gpu.trainables.items()) +
                        list(I_gpu.trainables.items()) +
                        list(I_info_gpu.trainables.items()))
                    I_opts[n_level].register_gradients(
                        tf.reduce_mean(I_losses[n_level]),
                        MIIinfo_gpu_trainables)
                else:
                    MI_gpu_trainables = collections.OrderedDict(
                        list(M_gpu.trainables.items()) +
                        list(I_gpu.trainables.items()))
                    I_opts[n_level].register_gradients(
                        tf.reduce_mean(I_losses[n_level]), MI_gpu_trainables)

    # Setup training ops.
    I_train_ops = []
    I_reg_ops = []
    for n_level in range(ending_level):
        I_train_ops.append(I_opts[n_level].apply_updates())
        I_reg_ops.append(I_reg_opts[n_level].apply_updates(allow_no_op=True))
    Is_update_op = Is.setup_as_moving_average_of(I, beta=Is_beta)

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        I.setup_weight_histograms()
        M.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        n_level = 0 if not use_level_training else min(
            cur_nimg // (level_I_kimg * 1000), training_set_resolution_log2 -
            2)
        # Choose training parameters and configure training ops.
        sched = training_schedule(
            cur_nimg=cur_nimg,
            training_set_resolution_log2=training_set_resolution_log2,
            **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu, sched.lod)
        if reset_opt_for_new_lod:
            if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(
                    sched.lod) != np.ceil(prev_lod):
                I_opts[n_level].reset_optimizer_state()
        prev_lod = sched.lod

        # Run training ops.
        feed_dict = {
            lod_in: sched.lod,
            lrate_in: sched.I_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            run_I_reg = (lazy_regularization
                         and running_mb_counter % I_reg_interval == 0)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run(I_train_ops[n_level], feed_dict)
                if run_I_reg:
                    tflib.run(I_reg_ops[n_level], feed_dict)
                tflib.run([Is_update_op], feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(I_train_ops[n_level], feed_dict)
                if run_I_reg:
                    for _round in rounds:
                        tflib.run(I_reg_ops[n_level], feed_dict)
                tflib.run(Is_update_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 100 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/lod', sched.lod),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):

                if not use_hyperplane:
                    prior_traj_latents = M.run(
                        grid_latents,
                        is_validation=True,
                        minibatch_size=sched.minibatch_gpu)
                    if use_std_in_m:
                        prior_traj_latents = prior_traj_latents[:, :
                                                                prior_latent_size]
                else:
                    prior_traj_latents = get_prior_traj_by_dirs(
                        latent_dirs, M, n_samples_per, prior_latent_size,
                        grid_labels, sched)
                    if resolution_manual >= 256:
                        prior_traj_latents = prior_traj_latents[:
                                                                prior_traj_latents
                                                                .shape[0] // 5]

                prior_traj_latents_show = np.reshape(
                    prior_traj_latents, [-1, n_samples_per, prior_latent_size])
                print_traj(prior_traj_latents_show)
                grid_fakes = Gs.run(prior_traj_latents,
                                    grid_labels,
                                    is_validation=True,
                                    minibatch_size=sched.minibatch_gpu,
                                    randomize_noise=True,
                                    normalize_latents=False)
                grid_fakes = add_outline(grid_fakes, width=1)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)
                if (n_continuous == 2) and (n_discrete == 0):
                    n_per_line = 20
                    ex_latent_value = 3
                    prior_grid_latents, prior_grid_labels = get_2d_grid_latents(
                        low=-ex_latent_value,
                        high=ex_latent_value,
                        n_per_line=n_per_line,
                        grid_labels=grid_labels)
                    grid_showing_fakes = Gs.run(
                        prior_grid_latents,
                        prior_grid_labels,
                        is_validation=True,
                        minibatch_size=sched.minibatch_gpu,
                        randomize_noise=True,
                        normalize_latents=False)
                    grid_showing_fakes = add_outline(grid_showing_fakes,
                                                     width=1)
                    misc.save_image_grid(grid_showing_fakes,
                                         dnnlib.make_run_dir_path(
                                             'fakes_2d_prior_grid%06d.png' %
                                             (cur_nimg // 1000)),
                                         drange=drange_net,
                                         grid_size=[n_per_line, n_per_line])
                    img_to_draw = Image.open(
                        dnnlib.make_run_dir_path(
                            'fakes_2d_prior_grid%06d.png' %
                            (cur_nimg // 1000)))
                    img_to_draw = img_to_draw.convert('RGB')
                    img_to_draw = draw_traj_on_prior_grid(
                        img_to_draw, prior_traj_latents_show, ex_latent_value,
                        n_per_line)
                    img_to_draw.save(
                        dnnlib.make_run_dir_path(
                            'fakes_2d_prior_grid_drawn%06d.png' %
                            (cur_nimg // 1000)))

            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl((I, M, Is), pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            data_dir=dnnlib.convert_path(data_dir),
                            num_gpus=num_gpus,
                            tf_config=tf_config)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % sched.lod,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((I, M, Is), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 16
0
def training_loop(
    classifier_args={},  # Options for generator network.
    classifier_opt_args={},  # Options for generator optimizer.
    classifier_loss_args={},
    dataset_args={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    metric_arg_list=[],  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    data_dir=None,  # Directory to load datasets from.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    network_snapshot_ticks=5,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False):

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                        verbose=True,
                                        shuffle_mb=2 * 4096,
                                        **dataset_args)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        print('Constructing networks...')
        classifier = tflib.Network('classifier',
                                   num_channels=training_set.shape[0],
                                   resolution=training_set.shape[1],
                                   label_size=training_set.label_size,
                                   **classifier_args)

    classifier.print_layers()

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)

    # Setup optimizers.
    classifier_opt_args = dict(classifier_opt_args)

    classifier_opt_args['minibatch_multiplier'] = minibatch_multiplier
    classifier_opt_args['learning_rate'] = lrate_in

    classifier_opt = tflib.Optimizer(name='TrainClassifier',
                                     **classifier_opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('gpu%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            classifier_gpu = classifier if gpu == 0 else classifier.clone(
                classifier.name + '_shadow')

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                sched = training_schedule(cur_nimg=0, **sched_args)
                reals_var = tf.Variable(
                    name='reals',
                    trainable=False,
                    initial_value=tf.zeros([sched.minibatch_gpu] +
                                           training_set.shape))
                labels_var = tf.Variable(name='labels',
                                         trainable=False,
                                         initial_value=tf.zeros(
                                             [sched.minibatch_gpu, 127]))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, labels_write = process_reals(
                    reals_write, labels_write, mirror_augment,
                    training_set.dynamic_range, drange_net)
                reals_write = tf.concat(
                    [reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                labels_write = tf.concat(
                    [labels_write, labels_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                data_fetch_ops += [tf.assign(labels_var, labels_write)]
                reals_read = reals_var[:minibatch_gpu_in]
                labels_read = labels_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            with tf.name_scope('classifier_loss'):
                classifier_loss, label = dnnlib.util.call_func_by_name(
                    classifier=classifier_gpu,
                    images=reals_read,
                    labels=labels_read,
                    **classifier_loss_args)

            classifier_opt.register_gradients(tf.reduce_mean(classifier_loss),
                                              classifier_gpu.trainables)

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    classifier_train_op = classifier_opt.apply_updates()

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())

    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('', cur_epoch=0, max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = 0
    cur_tick = -1
    tick_start_nimg = cur_nimg
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg, **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu)

        # Run training ops.
        feed_dict = {
            lrate_in: sched.G_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([classifier_train_op, data_fetch_op], feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(data_fetch_op, feed_dict)
                    classifier_loss_out, label_out, _ = tflib.run(
                        [classifier_loss, label, classifier_train_op],
                        feed_dict)
                    print_output = False
                    if print_output:
                        print('label')
                        print(np.round(label_out, 2))
                        print('loss')
                        print(np.round(classifier_loss_out, 2))
        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start()

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl(classifier, pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            data_dir=dnnlib.convert_path(data_dir),
                            num_gpus=num_gpus,
                            tf_config=tf_config)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % 0,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl(classifier, dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 17
0
def encode(_target_image, _context_image, _output_dir):
    gpu_id = '0'
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    print(_target_image)
    assert os.path.exists('./static/' + _target_image)
    _output_dir = _output_dir[:-4]
    output_dir = './static/' + _output_dir

    tflib.init_tf({'rnd.np_random_seed': 1000})
    model_path = './styleganinv_face_256.pkl'
    with open(model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    crop_size = 110  # default crop size.
    center_x = 125
    center_y = 145
    crop_x = center_x - crop_size // 2  # default coordinate-X
    crop_y = center_y - crop_size // 2  # default coordinate-Y

    mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32)
    mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0

    # Build graph.
    sess = tf.get_default_session()

    batch_size = 4
    input_shape = E.input_shape
    input_shape[0] = batch_size  # default batch size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1
    x_mask_255 = (x_mask + 1) / 2 * 255

    latent_shape = Gs.components.synthesis.input_shape
    latent_shape[0] = batch_size  # default batch size
    wp = tf.get_variable(shape=latent_shape, name='latent_code')
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1
    x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255

    w_enc = E.get_output_for(x, phase=False)
    wp_enc = tf.reshape(w_enc, latent_shape)
    setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    print("Diffusion : Settings for Optimization.")
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_mask_255)
    x_rec_feat = perceptual_model(x_rec_mask_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3])

    loss_weight_feat = 5e-5
    learning_rate = 0.01
    loss = loss_pix + loss_weight_feat * loss_feat  # default The perceptual loss scale for optimization. (default 5e-5)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Invert image
    num_iterations = 100
    num_results = 5
    save_interval = num_iterations // num_results

    images = np.zeros(input_shape, np.uint8)

    print("Load target image.")
    _target_image = './static/' + _target_image
    target_image = resize_image(load_image(_target_image),
                                (image_size, image_size))
    save_image('./' + output_dir + '_tar.png', target_image)

    print("Load context image.")
    context_image = getContextImage(_context_image)
    context_image = resize_image(load_image(context_image),
                                 (image_size, image_size))
    save_image('./' + output_dir + '_cont.png', context_image)

    # Inverting Context Image.
    # context_image = invert(model_path, getContextImage(_context_image), wp, latent_shape)
    save_image('./' + output_dir + '_cont_inv.png', context_image)

    # Create Stitched Image
    # context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = (
    #     target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]
    # )
    # context_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] = (
    #     target_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190]
    # )
    print("Cropping Image...")
    # context_image = cropImage(target_image, context_image)

    target_image, rect = cropWithWhite(target_image)
    target_image = fourChannels(target_image)
    target_image = cut(target_image)
    target_image = transBg(target_image)

    context_image = createStitchedImage(context_image, target_image, rect)
    save_image('./' + output_dir + '_sti.png', context_image)
    images[0] = np.transpose(context_image, [2, 0, 1])

    input = images.astype(np.float32) / 255 * 2.0 - 1.0

    # Run encoder
    print("Start Diffusion.")
    sess.run([setter], {x: input})
    output = sess.run([wp, x_rec])
    output[1] = adjust_pixel_range(output[1])

    col_idx = 4
    for step in tqdm(range(1, num_iterations + 1), leave=False):
        sess.run(train_op, {x: input})
        if step == num_iterations or step % save_interval == 0:
            output = sess.run([wp, x_rec])
            output[1] = adjust_pixel_range(output[1])
            if step == num_iterations:
                save_image(f'{output_dir}.png', output[1][0])
            col_idx += 1
    exit()
Ejemplo n.º 18
0
def main():
    """Main function."""
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    assert os.path.exists(args.target_list)
    target_list_name = os.path.splitext(os.path.basename(args.target_list))[0]
    assert os.path.exists(args.context_list)
    context_list_name = os.path.splitext(os.path.basename(
        args.context_list))[0]
    output_dir = args.output_dir or f'results/diffusion'
    job_name = f'{target_list_name}_TO_{context_list_name}'
    logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger')

    logger.info(f'Loading model.')
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(args.model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]
    crop_size = args.crop_size
    crop_x = args.center_x - crop_size // 2
    crop_y = args.center_y - crop_size // 2
    mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32)
    mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0

    # Build graph.
    logger.info(f'Building graph.')
    sess = tf.get_default_session()
    input_shape = E.input_shape
    input_shape[0] = args.batch_size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1
    x_mask_255 = (x_mask + 1) / 2 * 255
    latent_shape = Gs.components.synthesis.input_shape
    latent_shape[0] = args.batch_size
    wp = tf.get_variable(shape=latent_shape, name='latent_code')
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1
    x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255

    w_enc = E.get_output_for(x, phase=False)
    wp_enc = tf.reshape(w_enc, latent_shape)
    setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    logger.info(f'Setting configuration for optimization.')
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_mask_255)
    x_rec_feat = perceptual_model(x_rec_mask_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3])

    loss = loss_pix + args.loss_weight_feat * loss_feat
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Load image list.
    logger.info(f'Loading target images and context images.')
    target_list = []
    with open(args.target_list, 'r') as f:
        for line in f:
            target_list.append(line.strip())
    num_targets = len(target_list)
    context_list = []
    with open(args.context_list, 'r') as f:
        for line in f:
            context_list.append(line.strip())
    num_contexts = len(context_list)
    num_pairs = num_targets * num_contexts

    # Invert images.
    logger.info(f'Start diffusion.')
    save_interval = args.num_iterations // args.num_results
    headers = [
        'Target Image', 'Context Image', 'Stitched Image', 'Encoder Output'
    ]
    for step in range(1, args.num_iterations + 1):
        if step == args.num_iterations or step % save_interval == 0:
            headers.append(f'Step {step:06d}')
    viz_size = None if args.viz_size == 0 else args.viz_size
    visualizer = HtmlPageVisualizer(num_rows=num_pairs,
                                    num_cols=len(headers),
                                    viz_size=viz_size)
    visualizer.set_headers(headers)

    images = np.zeros(input_shape, np.uint8)
    latent_codes_enc = []
    latent_codes = []
    for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False):
        # Load target.
        target_image = resize_image(load_image(target_list[target_idx]),
                                    (image_size, image_size))
        visualizer.set_cell(target_idx * num_contexts, 0, image=target_image)
        for context_idx in tqdm(range(0, num_contexts, args.batch_size),
                                desc='Context ID',
                                leave=False):
            row_idx = target_idx * num_contexts + context_idx
            batch = context_list[context_idx:context_idx + args.batch_size]
            for i, context_image_path in enumerate(batch):
                context_image = resize_image(load_image(context_image_path),
                                             (image_size, image_size))
                visualizer.set_cell(row_idx + i, 1, image=context_image)
                context_image[crop_y:crop_y + crop_size, crop_x:crop_x +
                              crop_size] = (target_image[crop_y:crop_y +
                                                         crop_size,
                                                         crop_x:crop_x +
                                                         crop_size])
                visualizer.set_cell(row_idx + i, 2, image=context_image)
                images[i] = np.transpose(context_image, [2, 0, 1])
            inputs = images.astype(np.float32) / 255 * 2.0 - 1.0
            # Run encoder.
            sess.run([setter], {x: inputs})
            outputs = sess.run([wp, x_rec])
            latent_codes_enc.append(outputs[0][0:len(batch)])
            outputs[1] = adjust_pixel_range(outputs[1])
            for i, _ in enumerate(batch):
                visualizer.set_cell(row_idx + i, 3, image=outputs[1][i])
            # Optimize latent codes.
            col_idx = 4
            for step in tqdm(range(1, args.num_iterations + 1), leave=False):
                sess.run(train_op, {x: inputs})
                if step == args.num_iterations or step % save_interval == 0:
                    outputs = sess.run([wp, x_rec])
                    outputs[1] = adjust_pixel_range(outputs[1])
                    for i, _ in enumerate(batch):
                        visualizer.set_cell(row_idx + i,
                                            col_idx,
                                            image=outputs[1][i])
                    col_idx += 1
            latent_codes.append(outputs[0][0:len(batch)])

    # Save results.
    code_shape = [num_targets, num_contexts] + list(latent_shape[1:])
    np.save(f'{output_dir}/{job_name}_encoded_codes.npy',
            np.concatenate(latent_codes_enc, axis=0).reshape(code_shape))
    np.save(f'{output_dir}/{job_name}_inverted_codes.npy',
            np.concatenate(latent_codes, axis=0).reshape(code_shape))
    visualizer.save(f'{output_dir}/{job_name}.html')
Ejemplo n.º 19
0
def training_loop(
    # Configurations
    cG={},
    cD={},  # Generator and Discriminator command-line arguments
    dataset_args={},  # dataset.load_dataset() options
    sched_args={},  # train.TrainingSchedule options
    vis_args={},  # vis.eval options
    grid_args={},  # train.setup_snapshot_img_grid() options
    metric_arg_list=[],  # MetricGroup Options
    tf_config={},  # tflib.init_tf() options
    eval=False,  # Evaluation mode
    train=False,  # Training mode
    # Data
    data_dir=None,  # Directory to load datasets from
    total_kimg=25000,  # Total length of the training, measured in thousands of real images
    mirror_augment=False,  # Enable mirror augmentation?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks
    ratio=1.0,  # Image height/width ratio in the dataset
    # Optimization
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters
    lazy_regularization=True,  # Perform regularization as a separate training step?
    smoothing_kimg=10.0,  # Half-life of the running average of generator weights
    clip=None,  # Clip gradients threshold
    # Resumption
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning
    # Affects reporting and training schedule
    resume_time=0.0,  # Assumed wallclock time at the beginning, affects reporting
    recompile=False,  # Recompile network from source code (otherwise loads from snapshot)
    # Logging
    summarize=True,  # Create TensorBoard summaries
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=False,  # Include weight histograms in the tfevents file?
    img_snapshot_ticks=3,  # How often to save image snapshots? None = disable
    network_snapshot_ticks=3,  # How often to save network snapshots? None = only save networks-final.pkl
    last_snapshots=10,  # Maximal number of prior snapshots to save
    eval_images_num=50000,  # Sample size for the metrics
    printname="",  # Experiment name for logging
    # Architecture
    merge=False):  # Generate several images and then merge them

    # Initialize dnnlib and TensorFlow
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus
    cG.name, cD.name = "g", "d"

    # Load dataset, configure training scheduler and metrics object
    dataset = data.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                verbose=True,
                                **dataset_args)
    sched = training_schedule(sched_args,
                              cur_nimg=total_kimg * 1000,
                              dataset=dataset)
    metrics = metric_base.MetricGroup(metric_arg_list)

    # Construct or load networks
    with tf.device("/gpu:0"):
        no_op = tf.no_op()
        G, D, Gs = None, None, None
        if resume_pkl is None or recompile:
            misc.log("Constructing networks...", "white")
            G = tflib.Network("G",
                              num_channels=dataset.shape[0],
                              resolution=dataset.shape[1],
                              label_size=dataset.label_size,
                              **cG.args)
            D = tflib.Network("D",
                              num_channels=dataset.shape[0],
                              resolution=dataset.shape[1],
                              label_size=dataset.label_size,
                              **cD.args)
            Gs = G.clone("Gs")
        if resume_pkl is not None:
            G, D, Gs = load_nets(resume_pkl, G, D, Gs, recompile)

    G.print_layers()
    D.print_layers()

    # Train/Evaluate/Visualize
    # Labels are optional but not essential
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_img_grid(
        dataset, **grid_args)
    misc.save_img_grid(grid_reals,
                       dnnlib.make_run_dir_path("reals.png"),
                       drange=dataset.dynamic_range,
                       grid_size=grid_size)
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])

    if eval:
        # Save a snapshot of the current network to evaluate
        pkl = dnnlib.make_run_dir_path("network-eval-snapshot-%06d.pkl" %
                                       resume_kimg)
        misc.save_pkl((G, D, Gs), pkl, remove=False)

        # Quantitative evaluation
        metric = metrics.run(pkl,
                             num_imgs=eval_images_num,
                             run_dir=dnnlib.make_run_dir_path(),
                             data_dir=dnnlib.convert_path(data_dir),
                             num_gpus=num_gpus,
                             ratio=ratio,
                             tf_config=tf_config,
                             mirror_augment=mirror_augment)

        # Qualitative evaluation
        visualize.eval(G,
                       dataset,
                       batch_size=sched.minibatch_gpu,
                       drange_net=drange_net,
                       ratio=ratio,
                       **vis_args)

    if not train:
        dataset.close()
        exit()

    # Setup training inputs
    misc.log("Building TensorFlow graph...", "white")
    with tf.name_scope("Inputs"), tf.device("/cpu:0"):
        lrate_in_g = tf.placeholder(tf.float32, name="lrate_in_g", shape=[])
        lrate_in_d = tf.placeholder(tf.float32, name="lrate_in_d", shape=[])
        step = tf.placeholder(tf.int32, name="step", shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name="minibatch_size_in",
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name="minibatch_gpu_in",
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)
        beta = 0.5**tf.div(tf.cast(minibatch_size_in,
                                   tf.float32), smoothing_kimg *
                           1000.0) if smoothing_kimg > 0.0 else 0.0

    # Set optimizers
    for cN, lr in [(cG, lrate_in_g), (cD, lrate_in_d)]:
        set_optimizer(cN, lr, minibatch_multiplier, lazy_regularization, clip)

    # Build training graph for each GPU
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope("GPU%d" % gpu), tf.device("/gpu:%d" % gpu):

            # Create GPU-specific shadow copies of G and D
            for cN, N in [(cG, G), (cD, D)]:
                cN.gpu = N if gpu == 0 else N.clone(N.name + "_shadow")
            Gs_gpu = Gs if gpu == 0 else Gs.clone(Gs.name + "_shadow")

            # Fetch training data via temporary variables
            with tf.name_scope("DataFetch"):
                reals, labels = dataset.get_minibatch_tf()
                reals = process_reals(reals, dataset.dynamic_range, drange_net,
                                      mirror_augment)
                reals, reals_fetch = read_data(
                    reals, "reals", [sched.minibatch_gpu] + dataset.shape,
                    minibatch_gpu_in)
                labels, labels_fetch = read_data(
                    labels, "labels",
                    [sched.minibatch_gpu, dataset.label_size],
                    minibatch_gpu_in)
                data_fetch_ops += [reals_fetch, labels_fetch]

            # Evaluate loss functions
            with tf.name_scope("G_loss"):
                cG.loss, cG.reg = dnnlib.util.call_func_by_name(
                    G=cG.gpu,
                    D=cD.gpu,
                    dataset=dataset,
                    reals=reals,
                    minibatch_size=minibatch_gpu_in,
                    **cG.loss_args)

            with tf.name_scope("D_loss"):
                cD.loss, cD.reg = dnnlib.util.call_func_by_name(
                    G=cG.gpu,
                    D=cD.gpu,
                    dataset=dataset,
                    reals=reals,
                    labels=labels,
                    minibatch_size=minibatch_gpu_in,
                    **cD.loss_args)

            for cN in [cG, cD]:
                set_optimizer_ops(cN, lazy_regularization, no_op)

    # Setup training ops
    data_fetch_op = tf.group(*data_fetch_ops)
    for cN in [cG, cD]:
        cN.train_op = cN.opt.apply_updates()
        cN.reg_op = cN.reg_opt.apply_updates(allow_no_op=True)
    Gs_update_op = Gs.setup_as_moving_average_of(G, beta=beta)

    # Finalize graph
    with tf.device("/gpu:0"):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    # Tensorboard summaries
    if summarize:
        misc.log("Initializing logs...", "white")
        summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
        if save_tf_graph:
            summary_log.add_graph(tf.get_default_graph())
        if save_weight_histograms:
            G.setup_weight_histograms()
            D.setup_weight_histograms()

    # Initialize training
    misc.log("Training for %d kimg..." % total_kimg, "white")
    dnnlib.RunContext.get().update("",
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()

    cur_tick, running_mb_counter = -1, 0
    cur_nimg = int(resume_kimg * 1000)
    tick_start_nimg = cur_nimg
    for cN in [cG, cD]:
        cN.lossvals_agg = {
            k: None
            for k in ["loss", "reg", "norm", "reg_norm"]
        }
        cN.opt.reset_optimizer_state()

    # Training loop
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop():
            break

        # Choose training parameters and configure training ops
        sched = training_schedule(sched_args,
                                  cur_nimg=cur_nimg,
                                  dataset=dataset)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        dataset.configure(sched.minibatch_gpu)

        # Run training ops
        feed_dict = {
            lrate_in_g: sched.G_lrate,
            lrate_in_d: sched.D_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu,
            step: sched.kimg
        }

        # Several iterations before updating training parameters
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            for cN in [cG, cD]:
                cN.run_reg = lazy_regularization and (running_mb_counter %
                                                      cN.reg_interval == 0)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            for cN in [cG, cD]:
                cN.lossvals = {
                    k: None
                    for k in ["loss", "reg", "norm", "reg_norm"]
                }

            # Gradient accumulation
            for _round in rounds:
                cG.lossvals.update(
                    tflib.run([cG.train_op, cG.ops], feed_dict)[1])
                if cG.run_reg:
                    _, cG.lossvals["reg_norm"] = tflib.run(
                        [cG.reg_op, cG.reg_norm], feed_dict)

                tflib.run(data_fetch_op, feed_dict)

                cD.lossvals.update(
                    tflib.run([cD.train_op, cD.ops], feed_dict)[1])
                if cD.run_reg:
                    _, cD.lossvals["reg_norm"] = tflib.run(
                        [cD.reg_op, cD.reg_norm], feed_dict)

            tflib.run([Gs_update_op], feed_dict)

            # Track loss statistics
            for cN in [cG, cD]:
                for k in cN.lossvals_agg:
                    cN.lossvals_agg[k] = emaAvg(cN.lossvals_agg[k],
                                                cN.lossvals[k])

        # Perform maintenance tasks once per tick
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress
            print(
                ("tick %s kimg %s   loss/reg: G (%s %s) D (%s %s)   grad norms: G (%s %s) D (%s %s)   "
                 + "time %s sec/kimg %s maxGPU %sGB %s") %
                (misc.bold("%-5d" % autosummary("Progress/tick", cur_tick)),
                 misc.bcolored(
                     "{:>8.1f}".format(
                         autosummary("Progress/kimg", cur_nimg / 1000.0)),
                     "red"),
                 misc.bcolored("{:>6.3f}".format(cG.lossvals_agg["loss"] or 0),
                               "blue"),
                 misc.bold("{:>6.3f}".format(cG.lossvals_agg["reg"] or 0)),
                 misc.bcolored("{:>6.3f}".format(cD.lossvals_agg["loss"] or 0),
                               "blue"),
                 misc.bold("{:>6.3f}".format(cD.lossvals_agg["reg"] or 0)),
                 misc.cond_bcolored(cG.lossvals_agg["norm"], 20.0, "red"),
                 misc.cond_bcolored(cG.lossvals_agg["reg_norm"], 20.0, "red"),
                 misc.cond_bcolored(cD.lossvals_agg["norm"], 20.0, "red"),
                 misc.cond_bcolored(cD.lossvals_agg["reg_norm"], 20.0, "red"),
                 misc.bold("%-10s" % dnnlib.util.format_time(
                     autosummary("Timing/total_sec", total_time))),
                 "{:>7.2f}".format(
                     autosummary("Timing/sec_per_kimg",
                                 tick_time / tick_kimg)),
                 "{:>4.1f}".format(
                     autosummary("Resources/peak_gpu_mem_gb",
                                 peak_gpu_mem_op.eval() / 2**30)), printname))

            autosummary("Timing/total_hours", total_time / (60.0 * 60.0))
            autosummary("Timing/total_days", total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots
            if img_snapshot_ticks is not None and (
                    cur_tick % img_snapshot_ticks == 0 or done):
                visualize.eval(G,
                               dataset,
                               batch_size=sched.minibatch_gpu,
                               training=True,
                               step=cur_nimg // 1000,
                               grid_size=grid_size,
                               latents=grid_latents,
                               labels=grid_labels,
                               drange_net=drange_net,
                               ratio=ratio,
                               **vis_args)

            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path("network-snapshot-%06d.pkl" %
                                               (cur_nimg // 1000))
                misc.save_pkl((G, D, Gs), pkl, remove=False)

                if cur_tick % network_snapshot_ticks == 0 or done:
                    metric = metrics.run(
                        pkl,
                        num_imgs=eval_images_num,
                        run_dir=dnnlib.make_run_dir_path(),
                        data_dir=dnnlib.convert_path(data_dir),
                        num_gpus=num_gpus,
                        ratio=ratio,
                        tf_config=tf_config,
                        mirror_augment=mirror_augment)

                if last_snapshots > 0:
                    misc.rm(
                        sorted(
                            glob.glob(dnnlib.make_run_dir_path(
                                "network*.pkl")))[:-last_snapshots])

            # Update summaries and RunContext
            if summarize:
                metrics.update_autosummaries()
                tflib.autosummary.save_summaries(summary_log, cur_nimg)

            dnnlib.RunContext.get().update(None,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot
    misc.save_pkl((G, D, Gs),
                  dnnlib.make_run_dir_path("network-final.pkl"),
                  remove=False)

    # All done
    if summarize:
        summary_log.close()
    dataset.close()
Ejemplo n.º 20
0
def embed(batch_size,
          resolution,
          imgs,
          network,
          iteration,
          result_dir,
          seed=6600):
    tf.reset_default_graph()
    print('Loading networks from "%s"...' % network)
    tflib.init_tf()
    _, _, G = pretrained_networks.load_networks(network)
    img_in = tf.placeholder(tf.float32)
    step = tf.get_variable('step',
                           dtype=tf.float32,
                           initializer=tf.constant(0.0))
    lr = tf.train.exponential_decay(0.01,
                                    global_step=step,
                                    decay_steps=3500,
                                    decay_rate=0.5)
    opt = tf.train.AdamOptimizer(learning_rate=0.01,
                                 beta1=0.9,
                                 beta2=0.999,
                                 epsilon=1e-8)
    noise_vars = [
        var for name, var in G.components.synthesis.vars.items()
        if name.startswith('noise')
    ]

    G_kwargs = dnnlib.EasyDict()
    G_kwargs.randomize_noise = False
    G_syn = G.components.synthesis

    rnd = np.random.RandomState(seed)
    dlatent_avg = [
        var for name, var in G.vars.items() if name.startswith('dlatent_avg')
    ][0].eval()
    dlatent_avg = np.expand_dims(np.expand_dims(dlatent_avg, 0), 1)
    dlatent_avg = dlatent_avg.repeat(12, 1)
    dlatent = tf.get_variable('dlatent',
                              dtype=tf.float32,
                              initializer=tf.constant(dlatent_avg),
                              trainable=True)
    synth_img = G_syn.get_output_for(dlatent, is_training=False, **G_kwargs)
    # synth_img = (synth_img + 1.0) / 2.0

    with tf.variable_scope('mse_loss'):
        mse_loss = tf.reduce_mean(tf.square(img_in - synth_img))
    with tf.variable_scope('perceptual_loss'):
        vgg_in = tf.concat([img_in, synth_img], 0)
        tf.keras.backend.set_image_data_format('channels_first')
        vgg = tf.keras.applications.VGG16(
            include_top=False,
            input_tensor=vgg_in,
            input_shape=(3, 128, 128),
            weights='/gdata2/fengrl/metrics/vgg.h5',
            pooling=None)
        h1 = vgg.get_layer('block1_conv1').output
        h2 = vgg.get_layer('block1_conv2').output
        h3 = vgg.get_layer('block3_conv2').output
        h4 = vgg.get_layer('block4_conv2').output
        pcep_loss = tf.reduce_mean(tf.square(h1[0] - h1[1])) + tf.reduce_mean(tf.square(h2[0] - h2[1])) + \
                    tf.reduce_mean(tf.square(h3[0] - h3[1])) + tf.reduce_mean(tf.square(h4[0] - h4[1]))
    loss = 0.5 * mse_loss + 0.5 * pcep_loss
    with tf.control_dependencies([loss, tf.assign_add(step, 1.0)]):
        train_op = opt.minimize(mse_loss, var_list=[dlatent])
    reset_opt = tf.variables_initializer(opt.variables())
    reset_dl = tf.variables_initializer([dlatent, step])

    tflib.init_uninitialized_vars()
    # rnd = np.random.RandomState(seed)
    tflib.set_vars(
        {var: rnd.randn(*var.shape.as_list())
         for var in noise_vars})  # [height, width]
    idx = 0
    metrics_l = []
    metrics_p = []
    metrics_m = []
    metrics_d = []
    for img in imgs:
        img = np.expand_dims(img, 0)
        loss_list = []
        p_loss_list = []
        m_loss_list = []
        dl_list = []
        si_list = []
        tflib.run([reset_opt, reset_dl])
        print(lr.eval())
        for i in range(iteration):
            loss_, p_loss_, m_loss_, dl_, si_, _ = tflib.run(
                [loss, pcep_loss, mse_loss, dlatent, synth_img, train_op],
                {img_in: img})
            loss_list.append(loss_)
            p_loss_list.append(p_loss_)
            m_loss_list.append(m_loss_)
            dl_loss_ = np.sum(np.square(dl_ - dlatent_avg))
            dl_list.append(dl_loss_)
            if i % 500 == 0:
                si_list.append(si_)
            if i % 100 == 0:
                print('idx %d, Loss %f, mse %f, ppl %f, dl %f, step %d' %
                      (idx, loss_, m_loss_, p_loss_, dl_loss_, i))
        print('loss: %f, ppl: %f, mse: %f, d: %f' %
              (loss_list[-1], p_loss_list[-1], m_loss_list[-1], dl_list[-1]))
        metrics_l.append(loss_list[-1])
        metrics_p.append(p_loss_list[-1])
        metrics_m.append(m_loss_list[-1])
        metrics_d.append(dl_list[-1])
        misc.save_image_grid(np.concatenate(si_list, 0),
                             os.path.join(result_dir, 'si%d.png' % idx),
                             drange=[-1, 1])
        misc.save_image_grid(si_list[-1],
                             os.path.join(result_dir, 'sifinal%d.png' % idx),
                             drange=[-1, 1])
        with open(os.path.join(result_dir, 'metric_l%d.txt' % idx), 'w') as f:
            for l_ in loss_list:
                f.write(str(l_) + '\n')
        with open(os.path.join(result_dir, 'metric_p%d.txt' % idx), 'w') as f:
            for l_ in p_loss_list:
                f.write(str(l_) + '\n')
        with open(os.path.join(result_dir, 'metric_m%d.txt' % idx), 'w') as f:
            for l_ in m_loss_list:
                f.write(str(l_) + '\n')
        with open(os.path.join(result_dir, 'metric_d%d.txt' % idx), 'w') as f:
            for l_ in dl_list:
                f.write(str(l_) + '\n')
        idx += 1

    l_mean = np.mean(metrics_l)
    p_mean = np.mean(metrics_p)
    m_mean = np.mean(metrics_m)
    d_mean = np.mean(metrics_d)
    with open(os.path.join(result_dir, 'metric_lmpd.txt'), 'w') as f:
        for i in range(len(metrics_l)):
            f.write(
                str(metrics_l[i]) + '    ' + str(metrics_m[i]) + '    ' +
                str(metrics_p[i]) + '    ' + str(metrics_d[i]) + '\n')

    print(
        'Overall metrics: loss_mean %f, ppl_mean %f, mse_mean %f, d_mean %f' %
        (l_mean, p_mean, m_mean, d_mean))
    with open(os.path.join(result_dir, 'mean_metrics.txt'), 'w') as f:
        f.write('loss %f\n' % l_mean)
        f.write('mse %f\n' % m_mean)
        f.write('ppl %f\n' % p_mean)
        f.write('dl %f\n' % d_mean)
Ejemplo n.º 21
0
def training_loop_infernet(
    I_args={},  # Options for infogan-head/vcgan-head network.
    I_opt_args={},  # Options for discriminator optimizer.
    loss_args={},  # Options for discriminator loss.
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    metric_arg_list=[],  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    lazy_regularization=True,  # Perform regularization as a separate training step?
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=5,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=False,  # Include weight histograms in the tfevents file?
    G_pkl=None,  # The G to load.
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets=False,  # Construct new networks according to G_args and D_args before resuming training?
    n_samples_per=10):  # Number of samples for each line in traversal.

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Construct or load networks.
    with tf.device('/gpu:0'):
        G, D, I, Gs = misc.load_pkl(G_pkl)
        print('Gs.output_shapes:', Gs.output_shapes)
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            I = tflib.Network('I',
                              num_channels=Gs.output_shapes[0][1],
                              resolution=Gs.output_shapes[0][2],
                              **I_args)
        if resume_pkl is not None:
            print('Loading networks from "%s"...' % resume_pkl)
            rI, rGs = misc.load_pkl(resume_pkl)
            if resume_with_new_nets:
                I.copy_vars_from(rI)
                Gs.copy_vars_from(rGs)
            else:
                I = rI
                Gs = rGs

    # Print layers and generate initial image snapshot.
    Gs.print_layers()
    I.print_layers()

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)

    # Setup optimizers.
    I_opt_args = dict(I_opt_args)
    I_opt_args['minibatch_multiplier'] = minibatch_multiplier
    I_opt_args['learning_rate'] = lrate_in
    I_opt = tflib.Optimizer(name='TrainI', **I_opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            I_gpu = I if gpu == 0 else I.clone(I.name + '_shadow')
            G_gpu = Gs if gpu == 0 else Gs.clone(Gs.name + '_shadow')

            # Evaluate loss functions.
            with tf.name_scope('I_loss'):
                loss, reg = dnnlib.util.call_func_by_name(
                    G=G_gpu,
                    I=I_gpu,
                    opt=I_opt,
                    minibatch_size=minibatch_gpu_in,
                    **loss_args)

            if reg is not None: loss += reg

            # Register gradients.
            I_opt.register_gradients(tf.reduce_mean(loss), I_gpu.trainables)

    # Setup training ops.
    I_train_op = I_opt.apply_updates()

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        I.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        assert sched_args.minibatch_size % (sched_args.minibatch_gpu *
                                            num_gpus) == 0

        # Run training ops.
        feed_dict = {
            lrate_in: sched_args.lrate,
            minibatch_size_in: sched_args.minibatch_size,
            minibatch_gpu_in: sched_args.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched_args.minibatch_size,
                           sched_args.minibatch_gpu * num_gpus)
            cur_nimg += sched_args.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([I_train_op], feed_dict)
            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(I_train_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched_args.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                %
                (autosummary('Progress/tick', cur_tick),
                 autosummary('Progress/kimg', cur_nimg / 1000.0),
                 autosummary('Progress/minibatch', sched_args.minibatch_size),
                 dnnlib.util.format_time(
                     autosummary('Timing/total_sec', total_time)),
                 autosummary('Timing/sec_per_tick', tick_time),
                 autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                 autosummary('Timing/maintenance_sec', maintenance_time),
                 autosummary('Resources/peak_gpu_mem_gb',
                             peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl((I, G), pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            num_gpus=num_gpus,
                            tf_config=tf_config,
                            train_infernet=True)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update(cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((I, G), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
Ejemplo n.º 22
0
def training_loop_vae(
        G_args={},  # Options for generator network.
        E_args={},  # Options for encoder network.
        D_args={},  # Options for discriminator network.
        G_opt_args={},  # Options for generator optimizer.
        D_opt_args={},  # Options for discriminator optimizer.
        G_loss_args={},  # Options for generator loss.
        D_loss_args={},  # Options for discriminator loss.
        dataset_args={},  # Options for dataset.load_dataset().
        sched_args={},  # Options for train.TrainingSchedule.
        grid_args={},  # Options for train.setup_snapshot_image_grid().
        metric_arg_list=[],  # Options for MetricGroup.
        tf_config={},  # Options for tflib.init_tf().
        data_dir=None,  # Directory to load datasets from.
        minibatch_repeats=1,  # Number of minibatches to run before adjusting training parameters.
        total_kimg=25000,  # Total length of the training, measured in thousands of real images.
        mirror_augment=False,  # Enable mirror augment?
        drange_net=[
            -1, 1
        ],  # Dynamic range used when feeding image data to the networks.
        image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
        network_snapshot_ticks=50,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
        save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
        save_weight_histograms=False,  # Include weight histograms in the tfevents file?
        resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
        resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
        resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
        resume_with_new_nets=False,  # Construct new networks according to G_args and D_args before resuming training?
        traversal_grid=False,  # Used for disentangled representation learning.
        n_discrete=0,  # Number of discrete latents in model.
        n_continuous=4,  # Number of continuous latents in model.
        topk_dims_to_show=20,  # Number of top disentant dimensions to show in a snapshot.
        subgroup_sizes_ls=None,
        subspace_sizes_ls=None,
        forward_eg=False,
        n_samples_per=10):  # Number of samples for each line in traversal.

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # If use Discriminator.
    use_D = D_args is not None

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                        verbose=True,
                                        **dataset_args)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    grid_fakes = add_outline(grid_reals, width=1)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            E = tflib.Network('E',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              input_shape=[None] + training_set.shape,
                              **E_args)
            G = tflib.Network(
                'G',
                num_channels=training_set.shape[0],
                resolution=training_set.shape[1],
                label_size=training_set.label_size,
                input_shape=[None, n_discrete +
                             G_args.latent_size] if not forward_eg else [
                                 None, n_discrete + G_args.latent_size +
                                 sum(subgroup_sizes_ls)
                             ],
                **G_args)
            if use_D:
                D = tflib.Network('D',
                                  num_channels=training_set.shape[0],
                                  resolution=training_set.shape[1],
                                  label_size=training_set.label_size,
                                  input_shape=[None, D_args.latent_size],
                                  **D_args)
        if resume_pkl is not None:
            print('Loading networks from "%s"...' % resume_pkl)
            if use_D:
                rE, rG, rD = misc.load_pkl(resume_pkl)
            else:
                rE, rG = misc.load_pkl(resume_pkl)
            if resume_with_new_nets:
                E.copy_vars_from(rE)
                G.copy_vars_from(rG)
                if use_D:
                    D.copy_vars_from(rD)
            else:
                E = rE
                G = rG
                if use_D:
                    D = rD

    # Print layers and generate initial image snapshot.
    E.print_layers()
    G.print_layers()
    if use_D:
        D.print_layers()
    sched = training_schedule(cur_nimg=total_kimg * 1000,
                              training_set=training_set,
                              **sched_args)
    if traversal_grid:
        if topk_dims_to_show > 0:
            topk_dims = np.arange(min(topk_dims_to_show, n_continuous))
        else:
            topk_dims = np.arange(n_continuous)
        grid_size, grid_latents, grid_labels = get_grid_latents(
            n_discrete, n_continuous, n_samples_per, G, grid_labels, topk_dims)
    else:
        grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    print('grid_size:', grid_size)
    print('grid_latents.shape:', grid_latents.shape)
    print('grid_labels.shape:', grid_labels.shape)
    grid_fakes, _, _, _, _, _, _, lie_vars = get_return_v(
        G.run(append_gfeats(grid_latents, G) if forward_eg else grid_latents,
              grid_labels,
              is_validation=True,
              minibatch_size=sched.minibatch_gpu,
              randomize_noise=True), 8)
    print('Lie_vars:', lie_vars[0])
    grid_fakes = add_outline(grid_fakes, width=1)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)

    # Setup optimizers.
    G_opt_args = dict(G_opt_args)
    G_opt_args['minibatch_multiplier'] = minibatch_multiplier
    G_opt_args['learning_rate'] = lrate_in
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    if use_D:
        D_opt_args = dict(D_opt_args)
        D_opt_args['minibatch_multiplier'] = minibatch_multiplier
        D_opt_args['learning_rate'] = lrate_in
        D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            E_gpu = E if gpu == 0 else E.clone(E.name + '_shadow')
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            if use_D:
                D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                sched = training_schedule(cur_nimg=int(resume_kimg * 1000),
                                          training_set=training_set,
                                          **sched_args)
                reals_var = tf.Variable(
                    name='reals',
                    trainable=False,
                    initial_value=tf.zeros([sched.minibatch_gpu] +
                                           training_set.shape))
                labels_var = tf.Variable(name='labels',
                                         trainable=False,
                                         initial_value=tf.zeros([
                                             sched.minibatch_gpu,
                                             training_set.label_size
                                         ]))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, labels_write = process_reals(
                    reals_write, labels_write, 0., mirror_augment,
                    training_set.dynamic_range, drange_net)
                reals_write = tf.concat(
                    [reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                labels_write = tf.concat(
                    [labels_write, labels_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                data_fetch_ops += [tf.assign(labels_var, labels_write)]
                reals_read = reals_var[:minibatch_gpu_in]
                labels_read = labels_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            if use_D:
                with tf.name_scope('G_loss'):
                    G_loss = dnnlib.util.call_func_by_name(
                        E=E_gpu,
                        G=G_gpu,
                        D=D_gpu,
                        opt=G_opt,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        labels=labels_read,
                        **G_loss_args)
                with tf.name_scope('D_loss'):
                    D_loss = dnnlib.util.call_func_by_name(
                        E=E_gpu,
                        D=D_gpu,
                        opt=D_opt,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        labels=labels_read,
                        **D_loss_args)
            else:
                with tf.name_scope('G_loss'):
                    G_loss = dnnlib.util.call_func_by_name(
                        E=E_gpu,
                        G=G_gpu,
                        opt=G_opt,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        labels=labels_read,
                        **G_loss_args)

            # Register gradients.
            EG_gpu_trainables = collections.OrderedDict(
                list(E_gpu.trainables.items()) +
                list(G_gpu.trainables.items()))
            G_opt.register_gradients(tf.reduce_mean(G_loss), EG_gpu_trainables)
            # G_opt.register_gradients(G_loss,
            # EG_gpu_trainables)
            if use_D:
                D_opt.register_gradients(tf.reduce_mean(D_loss),
                                         D_gpu.trainables)
                # D_opt.register_gradients(D_loss,
                # D_gpu.trainables)

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    G_train_op = G_opt.apply_updates()
    if use_D:
        D_train_op = D_opt.apply_updates()

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms()
        if use_D:
            D.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg,
                                  training_set=training_set,
                                  **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu, 0)

        # Run training ops.
        feed_dict = {
            lrate_in: sched.G_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([G_train_op], feed_dict)
                tflib.run([data_fetch_op], feed_dict)
                if use_D:
                    tflib.run([D_train_op], feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(G_train_op, feed_dict)
                for _round in rounds:
                    tflib.run(data_fetch_op, feed_dict)
                    if use_D:
                        tflib.run(D_train_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                if use_D:
                    misc.save_pkl((E, G, D), pkl)
                else:
                    misc.save_pkl((E, G), pkl)
                met_outs = metrics.run(pkl,
                                       run_dir=dnnlib.make_run_dir_path(),
                                       data_dir=dnnlib.convert_path(data_dir),
                                       num_gpus=num_gpus,
                                       tf_config=tf_config,
                                       is_vae=True,
                                       use_D=use_D,
                                       Gs_kwargs=dict(is_validation=True))
                if topk_dims_to_show > 0:
                    if 'tpl_per_dim' in met_outs:
                        avg_distance_per_dim = met_outs[
                            'tpl_per_dim']  # shape: (n_continuous)
                        topk_dims = np.argsort(
                            avg_distance_per_dim
                        )[::-1][:topk_dims_to_show]  # shape: (20)
                    else:
                        topk_dims = np.arange(
                            min(topk_dims_to_show, n_continuous))
                else:
                    topk_dims = np.arange(n_continuous)

            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):
                if traversal_grid:
                    grid_size, grid_latents, grid_labels = get_grid_latents(
                        n_discrete, n_continuous, n_samples_per, G,
                        grid_labels, topk_dims)
                else:
                    grid_latents = np.random.randn(np.prod(grid_size),
                                                   *G.input_shape[1:])

                grid_fakes, _, _, _, _, _, _, lie_vars = get_return_v(
                    G.run(append_gfeats(grid_latents, G)
                          if forward_eg else grid_latents,
                          grid_labels,
                          is_validation=True,
                          minibatch_size=sched.minibatch_gpu,
                          randomize_noise=True), 8)
                print('Lie_vars:', lie_vars[0])
                grid_fakes = add_outline(grid_fakes, width=1)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % 0,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    if use_D:
        misc.save_pkl((E, G, D), dnnlib.make_run_dir_path('network-final.pkl'))
    else:
        misc.save_pkl((E, G), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 23
0
def training_loop(
    G_args={},  # Options for generator network.
    D_args={},  # Options for discriminator network.
    G_opt_args={},  # Options for generator optimizer.
    D_opt_args={},  # Options for discriminator optimizer.
    loss_args={},  # Options for loss.
    dataset_args={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    metric_arg_list=[],  # Options for metrics.
    metric_args={},  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    ema_start_kimg=None,  # Start of the exponential moving average. Default to the half-life period.
    G_ema_kimg=10,  # Half-life of the exponential moving average of generator weights.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    lazy_regularization=False,  # Perform regularization as a separate training step?
    G_reg_interval=4,  # How often the perform regularization for G? Ignored if lazy_regularization=False.
    D_reg_interval=4,  # How often the perform regularization for D? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod=True,  # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=2,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=1,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=False,  # Include weight histograms in the tfevents file?
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets=False
):  # Construct new networks according to G_args and D_args before resuming training?

    if ema_start_kimg is None:
        ema_start_kimg = G_ema_kimg

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(verbose=True, **dataset_args)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            G = tflib.Network('G',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **G_args)
            D = tflib.Network('D',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **D_args)
            Gs = G.clone('Gs')
        if resume_pkl is not None:
            resume_networks = misc.load_pkl(resume_pkl)
            rG, rD, rGs = resume_networks
            if resume_with_new_nets:
                G.copy_vars_from(rG)
                D.copy_vars_from(rD)
                Gs.copy_vars_from(rGs)
            else:
                G, D, Gs = rG, rD, rGs

    # Print layers and generate initial image snapshot.
    G.print_layers()
    D.print_layers()
    sched = training_schedule(cur_nimg=total_kimg * 1000,
                              training_set=training_set,
                              **sched_args)
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    grid_fakes = Gs.run(grid_latents,
                        grid_labels,
                        is_validation=True,
                        minibatch_size=sched.minibatch_gpu)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
        G_lrate_in = tf.placeholder(tf.float32, name='G_lrate_in', shape=[])
        D_lrate_in = tf.placeholder(tf.float32, name='D_lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        run_D_reg_in = tf.placeholder(tf.bool, name='run_D_reg', shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)
        Gs_beta_mul_in = tf.placeholder(tf.float32,
                                        name='Gs_beta_in',
                                        shape=[])
        Gs_beta = 0.5**tf.div(tf.cast(minibatch_size_in, tf.float32),
                              G_ema_kimg * 1000.0) if G_ema_kimg > 0.0 else 0.0

    # Setup optimizers.
    G_opt_args = dict(G_opt_args)
    D_opt_args = dict(D_opt_args)
    G_opt_args['learning_rate'] = G_lrate_in
    D_opt_args['learning_rate'] = D_lrate_in
    for args in [G_opt_args, D_opt_args]:
        args['minibatch_multiplier'] = minibatch_multiplier
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)

    # Build training graph for each GPU.
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):
            with tf.name_scope('DataFetch'):
                reals_read, labels_read = training_set.get_minibatch_tf()
                reals_read = process_reals(reals_read, lod_in, mirror_augment,
                                           training_set.dynamic_range,
                                           drange_net)

            # Create GPU-specific shadow copies of G and D.
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')

            # Evaluate loss functions.
            lod_assign_ops = []
            if 'lod' in G_gpu.vars:
                lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
            if 'lod' in D_gpu.vars:
                lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]
            with tf.control_dependencies(lod_assign_ops):
                with tf.name_scope('loss'):
                    G_loss, D_loss, D_reg = dnnlib.util.call_func_by_name(
                        G=G_gpu,
                        D=D_gpu,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        real_labels=labels_read,
                        **loss_args)

            # Register gradients.
            if not lazy_regularization:
                if D_reg is not None:
                    D_loss += D_reg
            else:
                if D_reg is not None:
                    D_loss = tf.cond(run_D_reg_in,
                                     lambda: D_loss + D_reg * D_reg_interval,
                                     lambda: D_loss)
            G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)
            D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables)

    # Setup training ops.
    Gs_update_op = Gs.setup_as_moving_average_of(G,
                                                 beta=Gs_beta * Gs_beta_mul_in)
    with tf.control_dependencies([Gs_update_op]):
        G_train_op = G_opt.apply_updates()
    D_train_op = D_opt.apply_updates()

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms()
        D.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list, **metric_args)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop():
            break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg,
                                  training_set=training_set,
                                  **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu)
        if reset_opt_for_new_lod:
            if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(
                    sched.lod) != np.ceil(prev_lod):
                G_opt.reset_optimizer_state()
                D_opt.reset_optimizer_state()
        prev_lod = sched.lod

        # Run training ops.
        feed_dict = {
            lod_in: sched.lod,
            G_lrate_in: sched.G_lrate,
            D_lrate_in: sched.D_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu,
            Gs_beta_mul_in: 1 if cur_nimg >= ema_start_kimg * 1000 else 0,
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            run_D_reg = (lazy_regularization
                         and running_mb_counter % D_reg_interval == 0)
            feed_dict[run_D_reg_in] = run_D_reg
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            for _ in rounds:
                tflib.run(G_train_op, feed_dict)
                tflib.run(D_train_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/lod', sched.lod),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):
                grid_fakes = Gs.run(grid_latents,
                                    grid_labels,
                                    is_validation=True,
                                    minibatch_size=sched.minibatch_gpu)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl((G, D, Gs), pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            num_gpus=num_gpus,
                            tf_config=tf_config)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % sched.lod,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 24
0
def embed(batch_size,
          resolution,
          imgs,
          network,
          iteration,
          result_dir,
          seed=6600):
    tf.reset_default_graph()
    print('Loading networks from "%s"...' % network)
    tflib.init_tf()
    _, _, G = pretrained_networks.load_networks(network)
    img_in = tf.placeholder(tf.float32)
    opt = tf.train.AdamOptimizer(learning_rate=0.01,
                                 beta1=0.9,
                                 beta2=0.999,
                                 epsilon=1e-8)
    noise_vars = [
        var for name, var in G.components.synthesis.vars.items()
        if name.startswith('noise')
    ]
    alpha_vars = [
        var for name, var in G.components.synthesis.vars.items()
        if name.endswith('alpha')
    ]
    alpha_eval = [alpha.eval() for alpha in alpha_vars]

    G_kwargs = dnnlib.EasyDict()
    G_kwargs.randomize_noise = False
    G_syn = G.components.synthesis

    rnd = np.random.RandomState(seed)
    dlatent_avg = [
        var for name, var in G.vars.items() if name.startswith('dlatent_avg')
    ][0].eval()
    dlatent_avg = np.expand_dims(np.expand_dims(dlatent_avg, 0), 1)
    dlatent_avg = dlatent_avg.repeat(12, 1)
    dlatent = tf.get_variable('dlatent',
                              dtype=tf.float32,
                              initializer=tf.constant(dlatent_avg),
                              trainable=True)
    synth_img = G_syn.get_output_for(dlatent, is_training=False, **G_kwargs)
    # synth_img = (synth_img + 1.0) / 2.0

    with tf.variable_scope('mse_loss'):
        mse_loss = tf.reduce_mean(tf.square(img_in - synth_img))
    with tf.variable_scope('perceptual_loss'):
        vgg_in = tf.concat([img_in, synth_img], 0)
        tf.keras.backend.set_image_data_format('channels_first')
        vgg = tf.keras.applications.VGG16(
            include_top=False,
            input_tensor=vgg_in,
            input_shape=(3, 128, 128),
            weights='/gdata2/fengrl/metrics/vgg.h5',
            pooling=None)
        h1 = vgg.get_layer('block1_conv1').output
        h2 = vgg.get_layer('block1_conv2').output
        h3 = vgg.get_layer('block3_conv2').output
        h4 = vgg.get_layer('block4_conv2').output
        pcep_loss = tf.reduce_mean(tf.square(h1[0] - h1[1])) + tf.reduce_mean(tf.square(h2[0] - h2[1])) + \
                    tf.reduce_mean(tf.square(h3[0] - h3[1])) + tf.reduce_mean(tf.square(h4[0] - h4[1]))
    loss = 0.5 * mse_loss + 0.5 * pcep_loss
    with tf.control_dependencies([loss]):
        train_op = opt.minimize(loss, var_list=[dlatent])
    reset_opt = tf.variables_initializer(opt.variables())
    reset_dl = tf.variables_initializer([dlatent])

    tflib.init_uninitialized_vars()
    # rnd = np.random.RandomState(seed)
    tflib.set_vars(
        {var: rnd.randn(*var.shape.as_list())
         for var in noise_vars})  # [height, width]
    idx = 0
    metrics_l = []
    metrics_p = []
    metrics_m = []
    metrics_d = []

    metrics_args = [metric_defaults[x] for x in ['fid50k', 'ppl_wend']]

    metrics_fun = metric_base.MetricGroup(metrics_args)
    for temperature in [0.2, 0.5, 1.0, 1.5, 2.0, 10.0]:
        tflib.set_vars({
            alpha: scale_alpha(alpha_np, temperature)
            for alpha, alpha_np in zip(alpha_vars, alpha_eval)
        })
        # misc.save_pkl((G, G, G), os.path.join(result_dir, 'temp%f.pkl' % temperature))
        # metrics_fun.run(os.path.join(result_dir, 'temp%f.pkl' % temperature), run_dir=result_dir,
        #                 data_dir='/gdata/fengrl/noise_test_dset/tfrecords',
        #                 dataset_args=dnnlib.EasyDict(tfrecord_dir='ffhq-128', shuffle_mb=0),
        #                 mirror_augment=True, num_gpus=1)
        for img in imgs:
            img = np.expand_dims(img, 0)
            loss_list = []
            p_loss_list = []
            m_loss_list = []
            dl_list = []
            si_list = []
            tflib.run([reset_opt, reset_dl])
            for i in range(iteration):
                loss_, p_loss_, m_loss_, dl_, si_, _ = tflib.run(
                    [loss, pcep_loss, mse_loss, dlatent, synth_img, train_op],
                    {img_in: img})
                loss_list.append(loss_)
                p_loss_list.append(p_loss_)
                m_loss_list.append(m_loss_)
                dl_loss_ = np.sum(np.square(dl_ - dlatent_avg))
                dl_list.append(dl_loss_)
                if i % 500 == 0:
                    si_list.append(si_)
                if i % 100 == 0:
                    print(
                        'Temperature %f, idx %d, Loss %f, mse %f, ppl %f, dl %f, step %d'
                        % (temperature, idx, loss_, m_loss_, p_loss_, dl_loss_,
                           i))
            print('Temperature %f, idx %d, loss: %f, ppl: %f, mse: %f, d: %f' %
                  (temperature, idx, loss_list[-1], p_loss_list[-1],
                   m_loss_list[-1], dl_list[-1]))
            metrics_l.append(loss_list[-1])
            metrics_p.append(p_loss_list[-1])
            metrics_m.append(m_loss_list[-1])
            metrics_d.append(dl_list[-1])
            misc.save_image_grid(np.concatenate(si_list, 0),
                                 os.path.join(
                                     result_dir,
                                     'temp%fsi%d.png' % (temperature, idx)),
                                 drange=[-1, 1])
            misc.save_image_grid(
                si_list[-1],
                os.path.join(result_dir,
                             'temp%fsifinal%d.png' % (temperature, idx)),
                drange=[-1, 1])
            with open(
                    os.path.join(result_dir,
                                 'temp%fmetric_l%d.txt' % (temperature, idx)),
                    'w') as f:
                for l_ in loss_list:
                    f.write(str(l_) + '\n')
            with open(
                    os.path.join(result_dir,
                                 'temp%fmetric_p%d.txt' % (temperature, idx)),
                    'w') as f:
                for l_ in p_loss_list:
                    f.write(str(l_) + '\n')
            with open(
                    os.path.join(result_dir,
                                 'temp%fmetric_m%d.txt' % (temperature, idx)),
                    'w') as f:
                for l_ in m_loss_list:
                    f.write(str(l_) + '\n')
            with open(
                    os.path.join(result_dir,
                                 'temp%fmetric_d%d.txt' % (temperature, idx)),
                    'w') as f:
                for l_ in dl_list:
                    f.write(str(l_) + '\n')
            idx += 1

        l_mean = np.mean(metrics_l)
        p_mean = np.mean(metrics_p)
        m_mean = np.mean(metrics_m)
        d_mean = np.mean(metrics_d)
        with open(
                os.path.join(result_dir,
                             'Temp%fmetric_lmpd.txt' % temperature), 'w') as f:
            for i in range(len(metrics_l)):
                f.write(
                    str(metrics_l[i]) + '    ' + str(metrics_m[i]) + '    ' +
                    str(metrics_p[i]) + '    ' + str(metrics_d[i]) + '\n')
        print(
            'Overall metrics: temp %f, loss_mean %f, ppl_mean %f, mse_mean %f, d_mean %f'
            % (temperature, l_mean, p_mean, m_mean, d_mean))
        with open(os.path.join(result_dir, 'mean_metrics'), 'a') as f:
            f.write('Temperature %f\n' % temperature)
            f.write('loss %f\n' % l_mean)
            f.write('mse %f\n' % m_mean)
            f.write('ppl %f\n' % p_mean)
            f.write('dl %f\n' % d_mean)
Ejemplo n.º 25
0
import numpy as np
import matplotlib.pyplot as plt
import dnnlib.tflib as tflib
import training.misc as misc
import os
from training import dataset
import tensorflow as tf

tfrecord_dir = '../../datasets/cars_v7_512'

tflib.init_tf()
dset = dataset.TFRecordDataset(tfrecord_dir,
                               max_label_size='full',
                               repeat=False,
                               shuffle_mb=0)
tflib.init_uninitialized_vars()
reals, labels = dset.get_minibatch_np(100)
indices = np.array([18, 1, 69, 25, 16, 0, 7, 3], dtype=np.uint16)
reals = np.take(reals, indices, axis=0)
labels = np.take(labels, indices, axis=0)

version = 'v7'

if version is 'v7':
    offsets = [1, 67, 12, 18, 10, 2, 5, 6]
if version is 'v5':
    offsets = [1, 67, 12, 18, 10, 8, 5, 6]
label_names = [
    'Car', 'Model', 'Color', 'Manufacturer', 'Body', 'Rotation', 'Ratio',
    'Background'
]
Ejemplo n.º 26
0
def training_auto_loop(
    Enc_args={},  # Options for encoder network.
    Dec_args={},  # Options for decoder network.
    opt_args={},  # Options for encoder optimizer.
    loss_args={},  # Options for discriminator loss.
    dataset_args={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    tf_config={},  # Options for tflib.init_tf().
    data_dir=None,  # Directory to load datasets from.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=50,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=False
):  # Include weight histograms in the tfevents file?

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                        verbose=True,
                                        **dataset_args)
    grid_size, grid_reals, _ = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        print('Constructing networks...')
        Enc = tflib.Network('Encoder',
                            resolution=training_set.shape[1],
                            out_channels=16,
                            **Enc_args)
        Dec = tflib.Network('Decoder',
                            resolution=training_set.shape[1] // 4,
                            in_channels=16,
                            **Dec_args)

    # Print layers and generate initial image snapshot.
    Enc.print_layers()
    Dec.print_layers()
    sched = sched_args
    sched.tick_kimg = 4
    grid_codes = Enc.run((grid_reals / 127.5) - 1.0,
                         minibatch_size=sched.minibatch_gpu)
    grid_fakes = Dec.run(grid_codes, minibatch_size=sched.minibatch_gpu)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)

    # Setup optimizers.
    opt_args = dict(opt_args)
    opt_args['minibatch_multiplier'] = minibatch_multiplier
    opt_args['learning_rate'] = lrate_in
    opt = tflib.Optimizer(name='TrainAuto', **opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of Enc and Dec.
            Enc_gpu = Enc if gpu == 0 else Enc.clone(Enc.name + '_shadow')
            Dec_gpu = Dec if gpu == 0 else Dec.clone(Dec.name + '_shadow')

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                reals_var = tf.Variable(
                    name='reals',
                    trainable=False,
                    initial_value=tf.zeros([sched.minibatch_gpu] +
                                           training_set.shape))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, _ = process_reals(reals_write, labels_write, 0.0,
                                               False,
                                               training_set.dynamic_range,
                                               drange_net)
                reals_write = tf.concat(
                    [reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                reals_read = reals_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            with tf.name_scope('loss'):
                loss = dnnlib.util.call_func_by_name(Enc=Enc_gpu,
                                                     Dec=Dec_gpu,
                                                     opt=opt,
                                                     reals=reals_read,
                                                     **loss_args)

            # Register gradients.
            opt.register_gradients(
                tf.reduce_mean(loss),
                list(Enc_gpu.trainables.values()) +
                list(Dec_gpu.trainables.values()))

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    train_op = opt.apply_updates()

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        Enc.setup_weight_histograms()
        Dec.setup_weight_histograms()

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('', max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = 0
    cur_tick = -1
    tick_start_nimg = cur_nimg
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu)

        # Run training ops.
        feed_dict = {
            lrate_in: sched.lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            cur_nimg += sched.minibatch_size

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run(data_fetch_op, feed_dict)
                tflib.run(train_op, feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(data_fetch_op, feed_dict)
                    tflib.run(train_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start()

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):
                grid_codes = Enc.run((grid_reals / 127.5) - 1.0,
                                     minibatch_size=sched.minibatch_gpu)
                grid_fakes = Dec.run(grid_codes,
                                     minibatch_size=sched.minibatch_gpu)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl((Enc, Dec), pkl)

            # Update summaries and RunContext.
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % 0.0,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((Enc, Dec), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 27
0
def main():
    """Main function."""
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    assert os.path.exists(args.image_list)
    image_list_name = os.path.splitext(os.path.basename(args.image_list))[0]
    output_dir = args.output_dir or f'results/inversion/{image_list_name}'
    logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger')

    logger.info(f'Loading model.')
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(args.model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    # Build graph.
    logger.info(f'Building graph.')
    sess = tf.get_default_session()
    input_shape = E.input_shape
    input_shape[0] = args.batch_size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255
    latent_shape = Gs.components.synthesis.input_shape
    latent_shape[0] = args.batch_size
    wp = tf.get_variable(shape=latent_shape, name='latent_code')
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255
    if args.random_init:
        logger.info(f'  Use random initialization for optimization.')
        wp_rnd = tf.random.normal(shape=latent_shape, name='latent_code_init')
        setter = tf.assign(wp, wp_rnd)
    else:
        logger.info(
            f'  Use encoder output as the initialization for optimization.')
        w_enc = E.get_output_for(x, is_training=False)
        wp_enc = tf.reshape(w_enc, latent_shape)
        setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    logger.info(f'Setting configuration for optimization.')
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_255)
    x_rec_feat = perceptual_model(x_rec_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3])
    if args.domain_regularizer:
        logger.info(f'  Involve encoder for optimization.')
        w_enc_new = E.get_output_for(x_rec, is_training=False)
        wp_enc_new = tf.reshape(w_enc_new, latent_shape)
        loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2])
    else:
        logger.info(f'  Do NOT involve encoder for optimization.')
        loss_enc = 0
    loss = (loss_pix + args.loss_weight_feat * loss_feat +
            args.loss_weight_enc * loss_enc)
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Load image list.
    logger.info(f'Loading image list.')
    image_list = []
    with open(args.image_list, 'r') as f:
        for line in f:
            image_list.append(line.strip())

    # Invert images.
    logger.info(f'Start inversion.')
    save_interval = args.num_iterations // args.num_results
    headers = ['Name', 'Original Image', 'Encoder Output']
    for step in range(1, args.num_iterations + 1):
        if step == args.num_iterations or step % save_interval == 0:
            headers.append(f'Step {step:06d}')
    viz_size = None if args.viz_size == 0 else args.viz_size
    visualizer = HtmlPageVisualizer(num_rows=len(image_list),
                                    num_cols=len(headers),
                                    viz_size=viz_size)
    visualizer.set_headers(headers)

    images = np.zeros(input_shape, np.uint8)
    names = ['' for _ in range(args.batch_size)]
    latent_codes_enc = []
    latent_codes = []
    for img_idx in tqdm(range(0, len(image_list), args.batch_size),
                        leave=False):
        # Load inputs.
        batch = image_list[img_idx:img_idx + args.batch_size]
        for i, image_path in enumerate(batch):
            image = resize_image(load_image(image_path),
                                 (image_size, image_size))
            images[i] = np.transpose(image, [2, 0, 1])
            names[i] = os.path.splitext(os.path.basename(image_path))[0]
        inputs = images.astype(np.float32) / 255 * 2.0 - 1.0
        # Run encoder.
        sess.run([setter], {x: inputs})
        outputs = sess.run([wp, x_rec])
        latent_codes_enc.append(outputs[0][0:len(batch)])
        outputs[1] = adjust_pixel_range(outputs[1])
        for i, _ in enumerate(batch):
            image = np.transpose(images[i], [1, 2, 0])
            save_image(f'{output_dir}/{names[i]}_ori.png', image)
            save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i])
            visualizer.set_cell(i + img_idx, 0, text=names[i])
            visualizer.set_cell(i + img_idx, 1, image=image)
            visualizer.set_cell(i + img_idx, 2, image=outputs[1][i])
        # Optimize latent codes.
        col_idx = 3
        for step in tqdm(range(1, args.num_iterations + 1), leave=False):
            sess.run(train_op, {x: inputs})
            if step == args.num_iterations or step % save_interval == 0:
                outputs = sess.run([wp, x_rec])
                outputs[1] = adjust_pixel_range(outputs[1])
                for i, _ in enumerate(batch):
                    if step == args.num_iterations:
                        save_image(f'{output_dir}/{names[i]}_inv.png',
                                   outputs[1][i])
                    visualizer.set_cell(i + img_idx,
                                        col_idx,
                                        image=outputs[1][i])
                col_idx += 1
        latent_codes.append(outputs[0][0:len(batch)])

    # Save results.
    os.system(f'cp {args.image_list} {output_dir}/image_list.txt')
    np.save(f'{output_dir}/encoded_codes.npy',
            np.concatenate(latent_codes_enc, axis=0))
    np.save(f'{output_dir}/inverted_codes.npy',
            np.concatenate(latent_codes, axis=0))
    visualizer.save(f'{output_dir}/inversion.html')
Ejemplo n.º 28
0
def training_loop_refinement(
    G_args={},  # Options for generator network.
    D_args={},  # Options for discriminator network.
    G_opt_args={},  # Options for generator optimizer.
    D_opt_args={},  # Options for discriminator optimizer.
    G_loss_args={},  # Options for generator loss.
    D_loss_args={},  # Options for discriminator loss.
    dataset_args={},  # Options for dataset.load_dataset().
    sched_args={},  # Options for train.TrainingSchedule.
    grid_args={},  # Options for train.setup_snapshot_image_grid().
    metric_arg_list=[],  # Options for MetricGroup.
    tf_config={},  # Options for tflib.init_tf().
    data_dir=None,  # Directory to load datasets from.
    G_smoothing_kimg=10.0,  # Half-life of the running average of generator weights.
    minibatch_repeats=4,  # Number of minibatches to run before adjusting training parameters.
    lazy_regularization=True,  # Perform regularization as a separate training step?
    G_reg_interval=4,  # How often the perform regularization for G? Ignored if lazy_regularization=False.
    D_reg_interval=16,  # How often the perform regularization for D? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod=True,  # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg=25000,  # Total length of the training, measured in thousands of real images.
    mirror_augment=False,  # Enable mirror augment?
    drange_net=[
        -1, 1
    ],  # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks=50,  # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks=50,  # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph=False,  # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms=True,  # Include weight histograms in the tfevents file?
    resume_pkl=None,  # Network pickle to resume training from, None = train from scratch.
    resume_kimg=0.0,  # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time=0.0,  # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets=False
):  # Construct new networks according to G_args and D_args before resuming training?

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir),
                                        verbose=True,
                                        **dataset_args)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(
        training_set, **grid_args)
    misc.save_image_grid(grid_reals,
                         dnnlib.make_run_dir_path('reals.png'),
                         drange=training_set.dynamic_range,
                         grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            G = tflib.Network('G',
                              num_channels=training_set.shape[0],
                              resolution=training_set.shape[1],
                              label_size=training_set.label_size,
                              **G_args)
            Gs = G.clone('Gs')
        if resume_pkl is not None:
            print('Loading networks from "%s"...' % resume_pkl)
            _rG, _rD, rGs = misc.load_pkl(resume_pkl)
            del _rD, _rG
            if resume_with_new_nets:
                G.copy_vars_from(rGs)
                Gs.copy_vars_from(rGs)
                del rGs
            else:
                G = rG
                Gs = rGs

    # Set constant noise input for both G and Gs
    if G_args.get("randomize_noise", None) == False:
        noise_vars = [
            var for name, var in G.components.synthesis.vars.items()
            if name.startswith('noise')
        ]
        rnd = np.random.RandomState(123)
        tflib.set_vars(
            {var: rnd.randn(*var.shape.as_list())
             for var in noise_vars})  # [height, width]

        noise_vars = [
            var for name, var in Gs.components.synthesis.vars.items()
            if name.startswith('noise')
        ]
        rnd = np.random.RandomState(123)
        tflib.set_vars(
            {var: rnd.randn(*var.shape.as_list())
             for var in noise_vars})  # [height, width]

    # TESTS
    # from PIL import Image
    # reals, latents = training_set.get_minibatch_np(4)
    # reals = np.transpose(reals, [0, 2, 3, 1])
    # Image.fromarray(reals[0], 'RGB').save("test_reals.png")

    # labels = training_set.get_random_labels_np(4)
    # Gs_kwargs = dnnlib.EasyDict()
    # Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
    # fakes = Gs.run(latents, labels, minibatch_size=4, **Gs_kwargs)
    # Image.fromarray(fakes[0], 'RGB').save("test_fakes_Gs_new.png")
    # fakes = G.run(latents, labels, minibatch_size=4, **Gs_kwargs)
    # Image.fromarray(fakes[0], 'RGB').save("test_fakes_G_new.png")

    # Print layers and generate initial image snapshot.
    G.print_layers()
    sched = training_schedule(cur_nimg=total_kimg * 1000,
                              training_set=training_set,
                              **sched_args)
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    grid_fakes = Gs.run(grid_latents,
                        grid_labels,
                        is_validation=True,
                        minibatch_size=sched.minibatch_gpu)
    misc.save_image_grid(grid_fakes,
                         dnnlib.make_run_dir_path('fakes_init.png'),
                         drange=drange_net,
                         grid_size=grid_size)

    # Setup training inputs.
    print('Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
        lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in = tf.placeholder(tf.int32,
                                           name='minibatch_size_in',
                                           shape=[])
        minibatch_gpu_in = tf.placeholder(tf.int32,
                                          name='minibatch_gpu_in',
                                          shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in *
                                                     num_gpus)
        Gs_beta = 0.5**tf.div(tf.cast(minibatch_size_in,
                                      tf.float32), G_smoothing_kimg *
                              1000.0) if G_smoothing_kimg > 0.0 else 0.0

    # Setup optimizers.
    G_opt_args = dict(G_opt_args)
    for args, reg_interval in [(G_opt_args, G_reg_interval)]:
        args['minibatch_multiplier'] = minibatch_multiplier
        args['learning_rate'] = lrate_in
        if lazy_regularization:
            mb_ratio = reg_interval / (reg_interval + 1)
            args['learning_rate'] *= mb_ratio
            if 'beta1' in args: args['beta1'] **= mb_ratio
            if 'beta2' in args: args['beta2'] **= mb_ratio
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)

    # Freeze layers
    G_args.freeze_layers = list(G_args.get("freeze_layers", []))

    def freeze_vars(gen, verbose=True):
        assert len(G_args.freeze_layers) > 0
        for name in list(gen.trainables.keys()):
            if any(layer in name for layer in G_args.freeze_layers):
                del gen.trainables[name]
                if verbose: print(f"Freezed {name}")

    # Build training graph for each GPU.
    data_fetch_ops = []
    loss_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            if G_args.freeze_layers: freeze_vars(G_gpu, verbose=False)

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                sched = training_schedule(cur_nimg=int(resume_kimg * 1000),
                                          training_set=training_set,
                                          **sched_args)
                reals_var = tf.Variable(
                    name='reals',
                    trainable=False,
                    initial_value=tf.zeros([sched.minibatch_gpu] +
                                           training_set.shape))
                labels_var = tf.Variable(name='labels',
                                         trainable=False,
                                         initial_value=tf.zeros([
                                             sched.minibatch_gpu,
                                             training_set.label_size
                                         ]))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, labels_write = process_reals(
                    reals_write, labels_write, lod_in, mirror_augment,
                    training_set.dynamic_range, drange_net)
                reals_write = tf.concat(
                    [reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                labels_write = tf.concat(
                    [labels_write, labels_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                data_fetch_ops += [tf.assign(labels_var, labels_write)]
                reals_read = reals_var[:minibatch_gpu_in]
                labels_read = labels_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            lod_assign_ops = []
            if 'lod' in G_gpu.vars:
                lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
            with tf.control_dependencies(lod_assign_ops):
                with tf.name_scope('G_loss'):
                    G_loss, G_reg = dnnlib.util.call_func_by_name(
                        G=G_gpu,
                        D=None,
                        opt=G_opt,
                        training_set=training_set,
                        minibatch_size=minibatch_gpu_in,
                        reals=reals_read,
                        latents=labels_read,
                        **G_loss_args)
                    loss_ops.append(G_loss)

            # Register gradients.
            if not lazy_regularization:
                if G_reg is not None: G_loss += G_reg
            else:
                if G_reg is not None:
                    G_reg_opt.register_gradients(
                        tf.reduce_mean(G_reg * G_reg_interval),
                        G_gpu.trainables)
            G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    loss_op = tf.reduce_mean(tf.concat(loss_ops, axis=0))
    G_train_op = G_opt.apply_updates()
    G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
    Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('',
                                   cur_epoch=resume_kimg,
                                   max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    loss_per_batch_sum = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg,
                                  training_set=training_set,
                                  **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu, sched.lod)
        if reset_opt_for_new_lod:
            if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(
                    sched.lod) != np.ceil(prev_lod):
                G_opt.reset_optimizer_state()
        prev_lod = sched.lod

        # Run training ops.
        feed_dict = {
            lod_in: sched.lod,
            lrate_in: sched.G_lrate,
            minibatch_size_in: sched.minibatch_size,
            minibatch_gpu_in: sched.minibatch_gpu
        }
        tflib.run(data_fetch_op, feed_dict)
        ### TEST
        # fakes = G.get_output_for(labels_read, training_set.get_random_labels_tf(minibatch_gpu_in), is_training=True) # this is without activation in ~[-1.5, 1.5]
        # fakes = tf.clip_by_value(fakes, drange_net[0], drange_net[1])
        # reals = reals_read
        ### TEST
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size,
                           sched.minibatch_gpu * num_gpus)
            run_G_reg = (lazy_regularization
                         and running_mb_counter % G_reg_interval == 0)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                loss, _ = tflib.run([loss_op, G_train_op], feed_dict)
                # (loss, reals, fakes), _ = tflib.run([loss_op, G_train_op], feed_dict)
                tflib.run([data_fetch_op], feed_dict)
                # print(f"loss_tf  {np.mean(loss)}")
                # print(f"loss_np  {np.mean(np.square(reals - fakes))}")
                # print(f"loss_abs {np.mean(np.abs(reals - fakes))}")

                loss_per_batch_sum += loss
                #### TEST ####
                # if cur_nimg == sched.minibatch_size or cur_nimg % 2048 == 0:
                #     from PIL import Image
                #     reals = np.transpose(reals, [0, 2, 3, 1])
                #     fakes = np.transpose(fakes, [0, 2, 3, 1])
                #     diff = np.abs(reals - fakes)
                #     print(diff.min(), diff.max())
                #     for idx, (fake, real) in enumerate(zip(fakes, reals)):
                #         fake -= fake.min()
                #         fake /= fake.max()
                #         fake *= 255
                #         fake = fake.astype(np.uint8)
                #         Image.fromarray(fake, 'RGB').save(f"fake_loss_{idx}.png")
                #         real -= real.min()
                #         real /= real.max()
                #         real *= 255
                #         real = real.astype(np.uint8)
                #         Image.fromarray(real, 'RGB').save(f"real_loss_{idx}.png")
                ####
                if run_G_reg:
                    tflib.run(G_reg_op, feed_dict)
                tflib.run([Gs_update_op], feed_dict)

            # Slow path with gradient accumulation. FIXME: Probably wrong
            else:
                for _round in rounds:
                    loss, _, _ = tflib.run(
                        [loss_op, G_train_op, data_fetch_op], feed_dict)
                    loss_per_batch_sum += loss / len(rounds)
                    if run_G_reg:
                        tflib.run(G_reg_op, feed_dict)
                tflib.run(Gs_update_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start(
            ) + resume_time
            tick_loss = loss_per_batch_sum * sched.minibatch_size / (
                tick_kimg * 1000)
            loss_per_batch_sum = 0

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d loss/px %-12.8f time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f'
                % (autosummary('Progress/tick', cur_tick),
                   autosummary('Progress/kimg', cur_nimg / 1000.0),
                   autosummary('Progress/lod', sched.lod),
                   autosummary('Progress/minibatch', sched.minibatch_size),
                   autosummary('Progress/loss_per_px', tick_loss),
                   dnnlib.util.format_time(
                       autosummary('Timing/total_sec', total_time)),
                   autosummary('Timing/sec_per_tick', tick_time),
                   autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                   autosummary('Timing/maintenance_sec', maintenance_time),
                   autosummary('Resources/peak_gpu_mem_gb',
                               peak_gpu_mem_op.eval() / 2**30)))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (
                    cur_tick % image_snapshot_ticks == 0 or done):
                grid_fakes = Gs.run(grid_latents,
                                    grid_labels,
                                    is_validation=True,
                                    minibatch_size=sched.minibatch_gpu)
                misc.save_image_grid(grid_fakes,
                                     dnnlib.make_run_dir_path(
                                         'fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net,
                                     grid_size=grid_size)
            if network_snapshot_ticks is not None and (
                    cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' %
                                               (cur_nimg // 1000))
                misc.save_pkl((G, None, Gs), pkl)
                metrics.run(pkl,
                            run_dir=dnnlib.make_run_dir_path(),
                            data_dir=dnnlib.convert_path(data_dir),
                            num_gpus=num_gpus,
                            tf_config=tf_config)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % sched.lod,
                                           cur_epoch=cur_nimg // 1000,
                                           max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get(
            ).get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((G, None, Gs), dnnlib.make_run_dir_path('network-final.pkl'))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 29
0
def training_loop(
    G_args                  = {},       # Options for generator network.
    D_args                  = {},       # Options for discriminator network.
    G_opt_args              = {},       # Options for generator optimizer.
    D_opt_args              = {},       # Options for discriminator optimizer.
    G_loss_args             = {},       # Options for generator loss.
    D_loss_args             = {},       # Options for discriminator loss.
    dataset_args            = {},       # Options for dataset.load_dataset().
    sched_args              = {},       # Options for train.TrainingSchedule.
    grid_args               = {},       # Options for train.setup_snapshot_image_grid().
    setname                 = None,   # Model name 
    tf_config               = {},       # Options for tflib.init_tf().
    G_smoothing_kimg        = 10.0,     # Half-life of the running average of generator weights.
    minibatch_repeats       = 4,        # Number of minibatches to run before adjusting training parameters.
    lazy_regularization     = True,     # Perform regularization as a separate training step?
    G_reg_interval          = 4,        # How often the perform regularization for G? Ignored if lazy_regularization=False.
    D_reg_interval          = 16,       # How often the perform regularization for D? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod   = True,     # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg              = 25000,    # Total length of the training, measured in thousands of real images.
    mirror_augment          = False,    # Enable mirror augment?
    mirror_augment_v        = False,  # Enable mirror augment vertically?
    drange_net              = [-1,1],   # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks    = 50,       # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks  = 50,       # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph           = False,    # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms  = False,    # Include weight histograms in the tfevents file?
    resume_pkl              = 'latest',     # Network pickle to resume training from, None = train from scratch.
    resume_kimg             = 0.0,      # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time             = 0.0,      # Assumed wallclock time at the beginning. Affects reporting.
    restore_partial_fn      = None,   # Filename of network for partial restore
    resume_with_new_nets    = False):   # Construct new networks according to G_args and D_args before resuming training?

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(verbose=True, **dataset_args)
    # custom resolution - for saved model name below
    resolution = training_set.resolution
    if training_set.init_res != [4,4]:
        init_res_str = '-%dx%d' % (training_set.init_res[0], training_set.init_res[1])
    else:
        init_res_str = ''
    ext = 'png' if training_set.shape[0] == 4 else 'jpg'
    print(' model base resolution', resolution)
    
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args)
    misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('_reals.%s'%ext), drange=training_set.dynamic_range, grid_size=grid_size)

    # Construct or load networks.
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print(' Constructing networks...')
            G = tflib.Network('G', num_channels=training_set.shape[0], resolution=resolution, label_size=training_set.label_size, **G_args)
            D = tflib.Network('D', num_channels=training_set.shape[0], resolution=resolution, label_size=training_set.label_size, **D_args)
            Gs = G.clone('Gs')
        if resume_pkl is not None:
            if resume_pkl == 'latest':
                resume_pkl, resume_kimg = misc.locate_latest_pkl(dnnlib.submit_config.run_dir_root)
            elif resume_pkl == 'restore_partial':
                print(' Restore partially...')
                # Initialize networks
                G = tflib.Network('G', num_channels=training_set.shape[0], resolution=resolution, label_size=training_set.label_size, **G_args)
                D = tflib.Network('D', num_channels=training_set.shape[0], resolution=resolution, label_size=training_set.label_size, **D_args)
                Gs = G.clone('Gs')
                # Load pre-trained networks
                assert restore_partial_fn != None
                G_partial, D_partial, Gs_partial = pickle.load(open(restore_partial_fn, 'rb'))
                # Restore (subset of) pre-trained weights (only parameters that match both name and shape)
                G.copy_compatible_trainables_from(G_partial)
                D.copy_compatible_trainables_from(D_partial)
                Gs.copy_compatible_trainables_from(Gs_partial)
            else:
                if resume_pkl is not None and resume_kimg == 0:
                    resume_pkl, resume_kimg = misc.locate_latest_pkl(resume_pkl)
                print(' Loading networks from "%s", kimg %.3g' % (resume_pkl, resume_kimg))
                rG, rD, rGs = misc.load_pkl(resume_pkl)
                if resume_with_new_nets:
                    G.copy_vars_from(rG)
                    D.copy_vars_from(rD)
                    Gs.copy_vars_from(rGs)
                else:
                    G, D, Gs = rG, rD, rGs
                
    # Print layers if needed and generate initial image snapshot
    # G.print_layers(); D.print_layers()
    sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args)
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)
    misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.%s'%ext), drange=drange_net, grid_size=grid_size)

    # Setup training inputs.
    print(' Building TensorFlow graph...')
    with tf.name_scope('Inputs'), tf.device('/cpu:0'):
        lod_in               = tf.placeholder(tf.float32, name='lod_in', shape=[])
        lrate_in             = tf.placeholder(tf.float32, name='lrate_in', shape=[])
        minibatch_size_in    = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[])
        minibatch_gpu_in     = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[])
        minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus)
        Gs_beta              = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0

    # Setup optimizers.
    G_opt_args = dict(G_opt_args)
    D_opt_args = dict(D_opt_args)
    for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]:
        args['minibatch_multiplier'] = minibatch_multiplier
        args['learning_rate'] = lrate_in
        if lazy_regularization:
            mb_ratio = reg_interval / (reg_interval + 1)
            args['learning_rate'] *= mb_ratio
            if 'beta1' in args: args['beta1'] **= mb_ratio
            if 'beta2' in args: args['beta2'] **= mb_ratio
    G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
    D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
    G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
    D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)

    # Build training graph for each GPU.
    data_fetch_ops = []
    for gpu in range(num_gpus):
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):

            # Create GPU-specific shadow copies of G and D.
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')

            # Fetch training data via temporary variables.
            with tf.name_scope('DataFetch'):
                sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args)
                reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape))
                labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size]))
                reals_write, labels_write = training_set.get_minibatch_tf()
                reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, mirror_augment_v, training_set.dynamic_range, drange_net)
                reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0)
                labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0)
                data_fetch_ops += [tf.assign(reals_var, reals_write)]
                data_fetch_ops += [tf.assign(labels_var, labels_write)]
                reals_read = reals_var[:minibatch_gpu_in]
                labels_read = labels_var[:minibatch_gpu_in]

            # Evaluate loss functions.
            lod_assign_ops = []
            if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
            if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]
            with tf.control_dependencies(lod_assign_ops):
                with tf.name_scope('G_loss'):
                    G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args)
                with tf.name_scope('D_loss'):
                    D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args)

            # Register gradients.
            if not lazy_regularization:
                if G_reg is not None: G_loss += G_reg
                if D_reg is not None: D_loss += D_reg
            else:
                if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables)
                if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables)
            G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)
            D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables)

    # Setup training ops.
    data_fetch_op = tf.group(*data_fetch_ops)
    G_train_op = G_opt.apply_updates()
    D_train_op = D_opt.apply_updates()
    G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
    D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
    Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    # print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms(); D.setup_weight_histograms()

    print(' Training for %d kimg (%d left) \n' % (total_kimg, total_kimg-resume_kimg))
    dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        # Choose training parameters and configure training ops.
        sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args)
        assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
        training_set.configure(sched.minibatch_gpu) # , sched.lod
        if reset_opt_for_new_lod:
            if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod):
                G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state()
        prev_lod = sched.lod

        # Run training ops.
        feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu}
        for _repeat in range(minibatch_repeats):
            rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus)
            run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0)
            run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0)
            cur_nimg += sched.minibatch_size
            running_mb_counter += 1

            # Fast path without gradient accumulation.
            if len(rounds) == 1:
                tflib.run([G_train_op, data_fetch_op], feed_dict)
                if run_G_reg:
                    tflib.run(G_reg_op, feed_dict)
                tflib.run([D_train_op, Gs_update_op], feed_dict)
                if run_D_reg:
                    tflib.run(D_reg_op, feed_dict)

            # Slow path with gradient accumulation.
            else:
                for _round in rounds:
                    tflib.run(G_train_op, feed_dict)
                if run_G_reg:
                    for _round in rounds:
                        tflib.run(G_reg_op, feed_dict)
                tflib.run(Gs_update_op, feed_dict)
                for _round in rounds:
                    tflib.run(data_fetch_op, feed_dict)
                    tflib.run(D_train_op, feed_dict)
                if run_D_reg:
                    for _round in rounds:
                        tflib.run(D_reg_op, feed_dict)

        # Perform maintenance tasks once per tick.
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
            cur_tick += 1
            cur_time = time.time()
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time

            if sched.lod == 0:
                left_kimg = total_kimg - cur_nimg / 1000
                left_sec = left_kimg * tick_time / tick_kimg
                finaltime = time.asctime(time.localtime(cur_time + left_sec))
                msg_final = '%ss left till %s ' % (shortime(left_sec), finaltime[11:16])
            else:
                msg_final = ''

            # Report progress.
            # print('tick %-4d kimg %-6.1f lod %-5.2f minibch %-3d:%d time %-8s min/tick %-6.3g %s sec/kimg %-7.3g gpumem %-4.1f %d lr %.2g ' % (
            print('tick %-4d kimg %-6.1f time %-8s  %s min/tick %-6.3g sec/kimg %-7.3g gpumem %-4.1f lr %.2g ' % (
                autosummary('Progress/tick', cur_tick),
                autosummary('Progress/kimg', cur_nimg / 1000.0),
                # autosummary('Progress/lod', sched.lod),
                # autosummary('Progress/minibatch', sched.minibatch_size),
                # autosummary('Progress/minibatch_gpu', sched.minibatch_gpu),
                dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)),
                msg_final,
                autosummary('Timing/min_per_tick', tick_time / 60),
                autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                # autosummary('Timing/maintenance_sec', maintenance_time),
                autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30),
                sched.G_lrate))
            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done):
                grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)
                misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fake-%04d.%s' % (cur_nimg // 1000, ext)), drange=drange_net, grid_size=grid_size)
            if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('snapshot-%d-%s%s-%04d.pkl' % (resolution, setname[-1], init_res_str, cur_nimg // 1000))
                misc.save_pkl((G, D, Gs), pkl)
                misc.save_pkl((Gs), dnnlib.make_run_dir_path('%s-%d-%s%s-%04d.pkl' % (setname[:-1], resolution, setname[-1], init_res_str, cur_nimg // 1000)))

            # Update summaries and RunContext.
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time

    # Save final snapshot.
    misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('snapshot-%d-%s%s-final.pkl' % (resolution, setname[-1], init_res_str)))
    misc.save_pkl((Gs), dnnlib.make_run_dir_path('%s-%d-%s%s-final.pkl' % (setname[:-1], resolution, setname[-1], init_res_str)))

    # All done.
    summary_log.close()
    training_set.close()
Ejemplo n.º 30
0
def training_loop(
    G_args                  = {},       # Options for generator network.
    D_args                  = {},       # Options for discriminator network.
    G_opt_args              = {},       # Options for generator optimizer.
    D_opt_args              = {},       # Options for discriminator optimizer.
    G_loss_args             = {},       # Options for generator loss.
    D_loss_args             = {},       # Options for discriminator loss.
    dataset_args            = {},       # Options for dataset.load_dataset().
    sched_args              = {},       # Options for train.TrainingSchedule.
    grid_args               = {},       # Options for train.setup_snapshot_image_grid().
    metric_arg_list         = [],       # Options for MetricGroup.
    tf_config               = {},       # Options for tflib.init_tf().
    data_dir                = None,     # Directory to load datasets from.
    G_smoothing_kimg        = 10.0,     # Half-life of the running average of generator weights.
    minibatch_repeats       = 4,        # Number of minibatches to run before adjusting training parameters.
    lazy_regularization     = True,     # Perform regularization as a separate training step?
    G_reg_interval          = 4,        # How often the perform regularization for G? Ignored if lazy_regularization=False.
    D_reg_interval          = 16,       # How often the perform regularization for D? Ignored if lazy_regularization=False.
    reset_opt_for_new_lod   = True,     # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
    total_kimg              = 25000,    # Total length of the training, measured in thousands of real images.
    mirror_augment          = False,    # Enable mirror augment?
    drange_net              = [0,1],   # Dynamic range used when feeding image data to the networks.
    image_snapshot_ticks    = 50,       # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
    network_snapshot_ticks  = 50,       # How often to save network snapshots? None = only save 'networks-final.pkl'.
    save_tf_graph           = False,    # Include full TensorFlow computation graph in the tfevents file?
    save_weight_histograms  = False,    # Include weight histograms in the tfevents file?
    resume_pkl              = None,     # Network pickle to resume training from, None = train from scratch.
    resume_kimg             = 0.0,      # Assumed training progress at the beginning. Affects reporting and training schedule.
    resume_time             = 0.0,      # Assumed wallclock time at the beginning. Affects reporting.
    resume_with_new_nets    = False):   # Construct new networks according to G_args and D_args before resuming training?

    # Initialize dnnlib and TensorFlow.
    tflib.init_tf(tf_config)
    num_gpus = dnnlib.submit_config.num_gpus

    # Load training set.
    training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args)
    grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args)
    misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size)
    # Construct or load networks.
    training_set.configure(minibatch_size=sched_args.batch_size)
    with tf.device('/gpu:0'):
        if resume_pkl is None or resume_with_new_nets:
            print('Constructing networks...')
            G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args)
            D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args)
            Gs = G.clone('Gs')
            start = 0
        if resume_pkl is not None:
            print('Loading networks from "%s"...' % resume_pkl)
            rG, rD, rGs = misc.load_pkl(resume_pkl)
            if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs)
            else: G = rG; D = rD; Gs = rGs
            start = int(resume_pkl.split('-')[-1].split('.')[0]) // sched_args.batch_size

    # Print layers and generate initial image snapshot.
    G.print_layers(); D.print_layers()
    grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
    grid_fakes = G.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched_args.batch_size)
    misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size)

    global_step = tf.Variable(start, trainable=False, name='learning_rate_step')
    learning_rate = tf.train.exponential_decay(sched_args.lr, global_step, sched_args.decay_step,
                                               sched_args.decay_rate, staircase=sched_args.stair)
    add_global = global_step.assign_add(1)
    D_opt = tflib.Optimizer(name='TrainD', learning_rate=learning_rate, **D_opt_args)
    G_opt = tflib.Optimizer(name='TrainG', learning_rate=learning_rate, **G_opt_args)

    for gpu in range(num_gpus):
        print('build graph on gpu %s' % str(gpu))
        with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):
            # Create GPU-specific shadow copies of G and D.
            G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
            D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')

            with tf.name_scope('DataFetch'):
                reals_read, labels_read = training_set.get_minibatch_tf()

                reals_read, labels_read = process_reals(reals_read, labels_read, mirror_augment,
                                                        training_set.dynamic_range, drange_net)

            with tf.name_scope('Loss'), tf.control_dependencies(None):
                loss, reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set,
                                                          minibatch_size=sched_args.batch_size, reals=reals_read,
                                                          labels=labels_read, **D_loss_args)
            with tf.control_dependencies([add_global]):
                G_opt.register_gradients(loss, G_gpu.trainables)
                D_opt.register_gradients(loss, D_gpu.trainables)

    G_train_op = G_opt.apply_updates()
    D_train_op = D_opt.apply_updates()

    # Finalize graph.
    with tf.device('/gpu:0'):
        try:
            peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
        except tf.errors.NotFoundError:
            peak_gpu_mem_op = tf.constant(0)
    tflib.init_uninitialized_vars()

    print('Initializing logs...')
    summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
    if save_tf_graph:
        summary_log.add_graph(tf.get_default_graph())
    if save_weight_histograms:
        G.setup_weight_histograms(); D.setup_weight_histograms()
    metrics = metric_base.MetricGroup(metric_arg_list)

    print('Training for %d kimg...\n' % total_kimg)
    dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg)
    maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = -1
    tick_start_nimg = cur_nimg
    prev_lod = -1.0
    running_mb_counter = 0
    while cur_nimg < total_kimg * 1000:
        if dnnlib.RunContext.get().should_stop(): break

        loss_, _, _, lr_ = tflib.run([loss, G_train_op, D_train_op, learning_rate])
        cur_nimg += sched_args.batch_size * num_gpus
        done = (cur_nimg >= total_kimg * 1000)
        if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched_args.tick_kimg * 1000 or done:
            cur_tick += 1
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = dnnlib.RunContext.get().get_time_since_last_update()
            total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time

            # Report progress.
            print(
                'tick %-5d kimg %-8.1f minibatch %-4d time %-12s sec/tick %-7.1f '
                'sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f loss %-8.1f lr %-2.5f' % (
                    autosummary('Progress/tick', cur_tick),
                    autosummary('Progress/kimg', cur_nimg / 1000.0),
                    autosummary('Progress/minibatch', sched_args.batch_size),
                    dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)),
                    autosummary('Timing/sec_per_tick', tick_time),
                    autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
                    autosummary('Timing/maintenance_sec', maintenance_time),
                    autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2 ** 30),
                    autosummary('loss', loss_),
                    autosummary('lr', lr_)))

            autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
            autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))

            # Save snapshots.
            if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done):
                grid_fakes = G.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched_args.batch_size)
                misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)),
                                     drange=drange_net, grid_size=grid_size)
            if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done):
                pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000))
                misc.save_pkl((G, D, Gs), pkl)
                metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir),
                            num_gpus=num_gpus, tf_config=tf_config)

            # Update summaries and RunContext.
            metrics.update_autosummaries()
            tflib.autosummary.save_summaries(summary_log, cur_nimg)
            dnnlib.RunContext.get().update('%.2f' % 0.0, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg)
            maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time

            # Save final snapshot.
    misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl'))

        # All done.
    summary_log.close()
    training_set.close()