def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/inversion/{image_list_name}' logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = args.batch_size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255 if args.random_init: logger.info(f' Use random initialization for optimization.') wp_rnd = tf.random.normal(shape=latent_shape, name='latent_code_init') setter = tf.assign(wp, wp_rnd) else: logger.info( f' Use encoder output as the initialization for optimization.') w_enc = E.get_output_for(x, is_training=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. logger.info(f'Setting configuration for optimization.') perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_255) x_rec_feat = perceptual_model(x_rec_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3]) if args.domain_regularizer: logger.info(f' Involve encoder for optimization.') w_enc_new = E.get_output_for(x_rec, is_training=False) wp_enc_new = tf.reshape(w_enc_new, latent_shape) loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2]) else: logger.info(f' Do NOT involve encoder for optimization.') loss_enc = 0 loss = (loss_pix + args.loss_weight_feat * loss_feat + args.loss_weight_enc * loss_enc) optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Invert images. logger.info(f'Start inversion.') save_interval = args.num_iterations // args.num_results headers = ['Name', 'Original Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) names = ['' for _ in range(args.batch_size)] latent_codes_enc = [] latent_codes = [] for img_idx in tqdm(range(0, len(image_list), args.batch_size), leave=False): # Load inputs. batch = image_list[img_idx:img_idx + args.batch_size] for i, image_path in enumerate(batch): image = resize_image(load_image(image_path), (image_size, image_size)) images[i] = np.transpose(image, [2, 0, 1]) names[i] = os.path.splitext(os.path.basename(image_path))[0] inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. sess.run([setter], {x: inputs}) outputs = sess.run([wp, x_rec]) latent_codes_enc.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): image = np.transpose(images[i], [1, 2, 0]) save_image(f'{output_dir}/{names[i]}_ori.png', image) save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i]) visualizer.set_cell(i + img_idx, 0, text=names[i]) visualizer.set_cell(i + img_idx, 1, image=image) visualizer.set_cell(i + img_idx, 2, image=outputs[1][i]) # Optimize latent codes. col_idx = 3 for step in tqdm(range(1, args.num_iterations + 1), leave=False): sess.run(train_op, {x: inputs}) if step == args.num_iterations or step % save_interval == 0: outputs = sess.run([wp, x_rec]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): if step == args.num_iterations: save_image(f'{output_dir}/{names[i]}_inv.png', outputs[1][i]) visualizer.set_cell(i + img_idx, col_idx, image=outputs[1][i]) col_idx += 1 latent_codes.append(outputs[0][0:len(batch)]) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/encoded_codes.npy', np.concatenate(latent_codes_enc, axis=0)) np.save(f'{output_dir}/inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/inversion.html')
def optimize_latent_codes_source_target(args, source_latent_code): #tf.enable_eager_execution() tflib.init_tf() with dnnlib.util.open_url(STYLEGAN_MODEL_URL, cache_dir=config.cache_dir) as f: _G, _D, Gs = pickle.load(f) latent_code_2 = tf.get_variable( name='latent_code_2', shape=(1, 18, 512), dtype='float32', initializer=tf.initializers.constant(source_latent_code)) generated_img = Gs.components.synthesis.get_output_for( latent_code_2, randomize_noise=False) generated_img = tf.transpose(generated_img, [0, 2, 3, 1]) generated_img = ((generated_img + 1) / 2) * 255 original_img = tf.placeholder( tf.float32, [None, args.input_img_size[0], args.input_img_size[1], 3]) blur_kernel = tf.placeholder( tf.float32, [None, args.blur_parameters[0], args.blur_parameters[0], 3]) degraded_img_resized_for_perceptual = tf.image.resize_images( add_motion_blur(original_img, blur_kernel), tuple(args.perceptual_img_size), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) generated_img_resized_to_original = tf.image.resize_images( generated_img, tuple(args.input_img_size), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) generated_img_resized_for_perceptual = tf.image.resize_images( add_motion_blur(generated_img_resized_to_original, blur_kernel), tuple(args.perceptual_img_size), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) generated_img_for_display = tf.saturate_cast( generated_img_resized_to_original, tf.uint8) perceptual_model = PerceptualModel(img_size=args.perceptual_img_size) generated_img_features = perceptual_model( generated_img_resized_for_perceptual) target_img_features = perceptual_model(degraded_img_resized_for_perceptual) loss_op = tf.reduce_mean( tf.abs(generated_img_features - target_img_features)) optimizer = tf.train.GradientDescentOptimizer( learning_rate=args.learning_rate) train_op = optimizer.minimize(loss_op, var_list=[latent_code_2]) sess = tf.get_default_session() img_names = sorted(os.listdir(args.imgs_dir_second)) for img_name in img_names: img = imageio.imread(os.path.join(args.imgs_dir_second, img_name)) img = cv2.resize(img, dsize=tuple(args.input_img_size)) blur_kernel_3d = motion_blur_kernel(args.blur_parameters[0], args.blur_parameters[1]) corrupted_img = add_motion_blur_single_image(img, args.blur_parameters[0], args.blur_parameters[1]) imageio.imwrite(os.path.join(args.corruptions_dir, img_name), corrupted_img) #imageio.imwrite(os.path.join(args.masks_dir, img_name), mask * 255) sess.run( tf.variables_initializer([latent_code_2] + optimizer.variables())) progress_bar_iterator = tqdm( iterable=range(args.total_iterations), bar_format= '{desc}: {percentage:3.0f}% |{bar}| {n_fmt}/{total_fmt}{postfix}', desc=img_name) for i in progress_bar_iterator: loss, _ = sess.run(fetches=[loss_op, train_op], feed_dict={ original_img: img[np.newaxis, ...], blur_kernel: blur_kernel_3d[np.newaxis, ...] }) progress_bar_iterator.set_postfix_str('loss=%.2f' % loss) reconstructed_imgs, latent_codes = sess.run( fetches=[generated_img_for_display, latent_code_2], feed_dict={ original_img: img[np.newaxis, ...], blur_kernel: blur_kernel_3d[np.newaxis, ...] }) imageio.imwrite(os.path.join(args.restorations_dir, img_name), reconstructed_imgs[0]) np.savez(file=os.path.join(args.latents_dir, img_name + '.npz'), latent_code=latent_codes[0]) return latent_codes[0]
def optimize_latent_codes(args): tflib.init_tf() with dnnlib.util.open_url(STYLEGAN_MODEL_URL, cache_dir=config.cache_dir) as f: _G, _D, Gs = pickle.load(f) latent_code = tf.get_variable( name='latent_code', shape=(1, 18, 512), dtype='float32', initializer=tf.initializers.zeros() ) generated_img = Gs.components.synthesis.get_output_for(latent_code, randomize_noise=False) generated_img = tf.transpose(generated_img, [0, 2, 3, 1]) generated_img = ((generated_img + 1) / 2) * 255 generated_img_for_display = tf.saturate_cast(generated_img, tf.uint8) target_img = tf.placeholder(tf.float32, [None, args.input_img_size[0], args.input_img_size[1], 3]) target_img_resized = tf.image.resize_images( target_img, tuple(args.perceptual_img_size), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR ) generated_img_resized = tf.image.resize_images( generated_img, tuple(args.perceptual_img_size), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR ) perceptual_model = PerceptualModel(img_size=args.perceptual_img_size) generated_img_features = perceptual_model(generated_img_resized) target_img_features = perceptual_model(target_img_resized) loss_op = tf.reduce_mean(tf.abs(generated_img_features - target_img_features)) if args.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) else: optimizer = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss_op, var_list=[latent_code]) sess = tf.get_default_session() img_names = sorted(os.listdir(args.imgs_dir)) for img_name in img_names: img = imageio.imread(os.path.join(args.imgs_dir, img_name)) sess.run(tf.variables_initializer([latent_code] + optimizer.variables())) progress_bar_iterator = tqdm( iterable=range(args.total_iterations), bar_format='{desc}: {percentage:3.0f}% |{bar}| {n_fmt}/{total_fmt}{postfix}', desc=img_name ) for i in progress_bar_iterator: loss, _ = sess.run( fetches=[loss_op, train_op], feed_dict={ target_img: img[np.newaxis, ...] } ) progress_bar_iterator.set_postfix_str('loss=%.2f' % loss) reconstructed_imgs, latent_codes = sess.run( fetches=[generated_img_for_display, latent_code], feed_dict={ target_img: img[np.newaxis, ...] } ) imageio.imwrite(os.path.join(args.reconstructions_dir, img_name), reconstructed_imgs[0]) np.savez(file=os.path.join(args.latents_dir, img_name + '.npz'), latent_code=latent_codes[0])
def training_loop( submit_config, Encoder_args = {}, E_opt_args = {}, D_opt_args = {}, E_loss_args = EasyDict(), D_loss_args = {}, lr_args = EasyDict(), tf_config = {}, dataset_args = EasyDict(), decoder_pkl = EasyDict(), drange_data = [0, 255], drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. mirror_augment = False, resume_run_id = config.ENCODER_PICKLE_DIR, # Run ID or network pkl to resume training from, None = start from scratch. resume_snapshot = None, # Snapshot index to resume training from, None = autodetect. image_snapshot_ticks = 1, # How often to export image snapshots? network_snapshot_ticks = 4, # How often to export network snapshots? max_iters = 150000): tflib.init_tf(tf_config) with tf.name_scope('input'): real_train = tf.placeholder(tf.float32, [submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size], name='real_image_train') real_test = tf.placeholder(tf.float32, [submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size], name='real_image_test') real_split = tf.split(real_train, num_or_size_splits=submit_config.num_gpus, axis=0) with tf.device('/gpu:0'): if resume_run_id is not None: network_pkl = misc.locate_network_pkl(resume_run_id, resume_snapshot) print('Loading networks from "%s"...' % network_pkl) E, G, D, Gs = misc.load_pkl(network_pkl) start = int(network_pkl.split('-')[-1].split('.')[0]) // submit_config.batch_size print('Start: ', start) else: print('Constructing networks...') G, D, Gs = misc.load_pkl(decoder_pkl.decoder_pkl) num_layers = Gs.components.synthesis.input_shape[1] E = tflib.Network('E', size=submit_config.image_size, filter=64, filter_max=1024, num_layers=num_layers, phase=True, **Encoder_args) start = 0 E.print_layers(); Gs.print_layers(); D.print_layers() global_step0 = tf.Variable(start, trainable=False, name='learning_rate_step') learning_rate = tf.train.exponential_decay(lr_args.learning_rate, global_step0, lr_args.decay_step, lr_args.decay_rate, staircase=lr_args.stair) add_global0 = global_step0.assign_add(1) E_opt = tflib.Optimizer(name='TrainE', learning_rate=learning_rate, **E_opt_args) D_opt = tflib.Optimizer(name='TrainD', learning_rate=learning_rate, **D_opt_args) E_loss_rec = 0. E_loss_adv = 0. D_loss_real = 0. D_loss_fake = 0. D_loss_grad = 0. for gpu in range(submit_config.num_gpus): print('build graph on gpu %s' % str(gpu)) with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): E_gpu = E if gpu == 0 else E.clone(E.name + '_shadow') D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') G_gpu = Gs if gpu == 0 else Gs.clone(Gs.name + '_shadow') perceptual_model = PerceptualModel(img_size=[E_loss_args.perceptual_img_size, E_loss_args.perceptual_img_size], multi_layers=False) real_gpu = process_reals(real_split[gpu], mirror_augment, drange_data, drange_net) with tf.name_scope('E_loss'), tf.control_dependencies(None): E_loss, recon_loss, adv_loss = dnnlib.util.call_func_by_name(E=E_gpu, G=G_gpu, D=D_gpu, perceptual_model=perceptual_model, reals=real_gpu, **E_loss_args) E_loss_rec += recon_loss E_loss_adv += adv_loss with tf.name_scope('D_loss'), tf.control_dependencies(None): D_loss, loss_fake, loss_real, loss_gp = dnnlib.util.call_func_by_name(E=E_gpu, G=G_gpu, D=D_gpu, reals=real_gpu, **D_loss_args) D_loss_real += loss_real D_loss_fake += loss_fake D_loss_grad += loss_gp with tf.control_dependencies([add_global0]): E_opt.register_gradients(E_loss, E_gpu.trainables) D_opt.register_gradients(D_loss, D_gpu.trainables) E_loss_rec /= submit_config.num_gpus E_loss_adv /= submit_config.num_gpus D_loss_real /= submit_config.num_gpus D_loss_fake /= submit_config.num_gpus D_loss_grad /= submit_config.num_gpus E_train_op = E_opt.apply_updates() D_train_op = D_opt.apply_updates() print('building testing graph...') fake_X_val = test(E, Gs, real_test, submit_config) sess = tf.get_default_session() print('Getting training data...') image_batch_train = get_train_data(sess, data_dir=dataset_args.data_train, submit_config=submit_config, mode='train') image_batch_test = get_train_data(sess, data_dir=dataset_args.data_test, submit_config=submit_config, mode='test') summary_log = tf.summary.FileWriter(config.GDRIVE_PATH) cur_nimg = start * submit_config.batch_size cur_tick = 0 tick_start_nimg = cur_nimg start_time = time.time() init_pascal = tf.initialize_variables( [global_step0], name='init_pascal' ) sess.run(init_pascal) print('Optimization starts!!!') for it in range(start, max_iters): batch_images = sess.run(image_batch_train) feed_dict_1 = {real_train: batch_images} _, recon_, adv_ = sess.run([E_train_op, E_loss_rec, E_loss_adv], feed_dict_1) _, d_r_, d_f_, d_g_ = sess.run([D_train_op, D_loss_real, D_loss_fake, D_loss_grad], feed_dict_1) cur_nimg += submit_config.batch_size if it % 50 == 0: print('Iter: %06d recon_loss: %-6.4f adv_loss: %-6.4f d_r_loss: %-6.4f d_f_loss: %-6.4f d_reg: %-6.4f time:%-12s' % ( it, recon_, adv_, d_r_, d_f_, d_g_, dnnlib.util.format_time(time.time() - start_time))) sys.stdout.flush() tflib.autosummary.save_summaries(summary_log, it) if it % 500 == 0: batch_images_test = sess.run(image_batch_test) batch_images_test = misc.adjust_dynamic_range(batch_images_test.astype(np.float32), [0, 255], [-1., 1.]) samples2 = sess.run(fake_X_val, feed_dict={real_test: batch_images_test}) orin_recon = np.concatenate([batch_images_test, samples2], axis=0) orin_recon = adjust_pixel_range(orin_recon) orin_recon = fuse_images(orin_recon, row=2, col=submit_config.batch_size_test) # save image results during training, first row is original images and the second row is reconstructed images save_image('%s/iter_%08d.png' % (submit_config.run_dir, cur_nimg), orin_recon) # save image to gdrive img_path = os.path.join(config.GDRIVE_PATH, 'images', ('iter_%08d.png' % (cur_nimg))) save_image(img_path, orin_recon) if cur_nimg >= tick_start_nimg + 65000: cur_tick += 1 tick_start_nimg = cur_nimg if cur_tick % network_snapshot_ticks == 0: pkl = os.path.join(submit_config.run_dir, 'network-snapshot-%08d.pkl' % (cur_nimg)) misc.save_pkl((E, G, D, Gs), pkl) # save network snapshot to gdrive pkl_drive = os.path.join(config.GDRIVE_PATH, 'snapshots', 'network-snapshot-%08d.pkl' % (cur_nimg)) misc.save_pkl((E, G, D, Gs), pkl_drive) misc.save_pkl((E, G, D, Gs), os.path.join(submit_config.run_dir, 'network-final.pkl')) summary_log.close()
def training_loop( submit_config, Encoder_args={}, E_opt_args={}, D_opt_args={}, E_loss_args={}, D_loss_args={}, lr_args=EasyDict(), tf_config={}, dataset_args=EasyDict(), decoder_pkl=EasyDict(), drange_data=[0, 255], drange_net=[ -1, 1 ], # Dynamic range used when feeding image data to the networks. mirror_augment=False, resume_run_id=None, # Run ID or network pkl to resume training from, None = start from scratch. resume_snapshot=None, # Snapshot index to resume training from, None = autodetect. image_snapshot_ticks=1, # How often to export image snapshots? network_snapshot_ticks=10, # How often to export network snapshots? save_tf_graph=False, # Include full TensorFlow computation graph in the tfevents file? save_weight_histograms=False, # Include weight histograms in the tfevents file? max_iters=150000, E_smoothing=0.999): tflib.init_tf(tf_config) with tf.name_scope('input'): real_train = tf.placeholder(tf.float32, [ submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size ], name='real_image_train') real_test = tf.placeholder(tf.float32, [ submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size ], name='real_image_test') real_split = tf.split(real_train, num_or_size_splits=submit_config.num_gpus, axis=0) with tf.device('/gpu:0'): if resume_run_id is not None: network_pkl = misc.locate_network_pkl(resume_run_id, resume_snapshot) print('Loading networks from "%s"...' % network_pkl) E, G, D, Gs, NE = misc.load_pkl(network_pkl) start = int(network_pkl.split('-')[-1].split('.') [0]) // submit_config.batch_size else: print('Constructing networks...') G, D, Gs, NE = misc.load_pkl(decoder_pkl.decoder_pkl) E = tflib.Network('E', size=submit_config.image_size, filter=64, filter_max=1024, phase=True, **Encoder_args) start = 0 Gs.print_layers() E.print_layers() D.print_layers() global_step = tf.Variable(start, trainable=False, name='learning_rate_step') learning_rate = tf.train.exponential_decay(lr_args.learning_rate, global_step, lr_args.decay_step, lr_args.decay_rate, staircase=lr_args.stair) add_global = global_step.assign_add(1) E_opt = tflib.Optimizer(name='TrainE', learning_rate=learning_rate, **E_opt_args) D_opt = tflib.Optimizer(name='TrainD', learning_rate=learning_rate, **D_opt_args) E_loss_rec = 0. E_loss_adv = 0. D_loss_real = 0. D_loss_fake = 0. D_loss_grad = 0. for gpu in range(submit_config.num_gpus): print('build graph on gpu %s' % str(gpu)) with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): E_gpu = E if gpu == 0 else E.clone(E.name + '_shadow') D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') G_gpu = Gs if gpu == 0 else Gs.clone(Gs.name + '_shadow') perceptual_model = PerceptualModel( img_size=[submit_config.image_size, submit_config.image_size], multi_layers=False) real_gpu = process_reals(real_split[gpu], mirror_augment, drange_data, drange_net) with tf.name_scope('E_loss'), tf.control_dependencies(None): E_loss, recon_loss, adv_loss = dnnlib.util.call_func_by_name( E=E_gpu, G=G_gpu, D=D_gpu, perceptual_model=perceptual_model, reals=real_gpu, **E_loss_args) E_loss_rec += recon_loss E_loss_adv += adv_loss with tf.name_scope('D_loss'), tf.control_dependencies(None): D_loss, loss_fake, loss_real, loss_gp = dnnlib.util.call_func_by_name( E=E_gpu, G=G_gpu, D=D_gpu, reals=real_gpu, **D_loss_args) D_loss_real += loss_real D_loss_fake += loss_fake D_loss_grad += loss_gp with tf.control_dependencies([add_global]): E_opt.register_gradients(E_loss, E_gpu.trainables) D_opt.register_gradients(D_loss, D_gpu.trainables) E_loss_rec /= submit_config.num_gpus E_loss_adv /= submit_config.num_gpus D_loss_real /= submit_config.num_gpus D_loss_fake /= submit_config.num_gpus D_loss_grad /= submit_config.num_gpus E_train_op = E_opt.apply_updates() D_train_op = D_opt.apply_updates() #Es_update_op = Es.setup_as_moving_average_of(E, beta=E_smoothing) print('building testing graph...') fake_X_val = test(E, Gs, real_test, submit_config) sess = tf.get_default_session() print('Getting training data...') image_batch_train = get_train_data(sess, data_dir=dataset_args.data_train, submit_config=submit_config, mode='train') image_batch_test = get_train_data(sess, data_dir=dataset_args.data_test, submit_config=submit_config, mode='test') summary_log = tf.summary.FileWriter(submit_config.run_dir) if save_tf_graph: summary_log.add_graph(tf.get_default_graph()) if save_weight_histograms: E.setup_weight_histograms() D.setup_weight_histograms() cur_nimg = start * submit_config.batch_size cur_tick = 0 tick_start_nimg = cur_nimg start_time = time.time() print('Optimization starts!!!') for it in range(start, max_iters): feed_dict = {real_train: sess.run(image_batch_train)} sess.run([E_train_op, E_loss_rec, E_loss_adv], feed_dict) sess.run([D_train_op, D_loss_real, D_loss_fake, D_loss_grad], feed_dict) cur_nimg += submit_config.batch_size if it % 100 == 0: print("Iter: %06d kimg: %-8.1f time: %-12s" % (it, cur_nimg / 1000, dnnlib.util.format_time(time.time() - start_time))) sys.stdout.flush() tflib.autosummary.save_summaries(summary_log, it) if cur_nimg >= tick_start_nimg + 65000: cur_tick += 1 tick_start_nimg = cur_nimg if cur_tick % image_snapshot_ticks == 0: batch_images_test = sess.run(image_batch_test) batch_images_test = misc.adjust_dynamic_range( batch_images_test.astype(np.float32), [0, 255], [-1., 1.]) samples2 = sess.run(fake_X_val, feed_dict={real_test: batch_images_test}) samples2 = samples2.transpose(0, 2, 3, 1) batch_images_test = batch_images_test.transpose(0, 2, 3, 1) orin_recon = np.concatenate([batch_images_test, samples2], axis=0) imwrite(immerge(orin_recon, 2, submit_config.batch_size_test), '%s/iter_%08d.png' % (submit_config.run_dir, cur_nimg)) if cur_tick % network_snapshot_ticks == 0: pkl = os.path.join(submit_config.run_dir, 'network-snapshot-%08d.pkl' % (cur_nimg)) misc.save_pkl((E, G, D, Gs, NE), pkl) misc.save_pkl((E, G, D, Gs, NE), os.path.join(submit_config.run_dir, 'network-final.pkl')) summary_log.close()
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.target_list) target_list_name = os.path.splitext(os.path.basename(args.target_list))[0] assert os.path.exists(args.context_list) context_list_name = os.path.splitext(os.path.basename( args.context_list))[0] output_dir = args.output_dir or f'results/diffusion' job_name = f'{target_list_name}_TO_{context_list_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] crop_size = args.crop_size crop_x = args.center_x - crop_size // 2 crop_y = args.center_y - crop_size // 2 mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32) mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0 # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1 x_mask_255 = (x_mask + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = args.batch_size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1 x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. logger.info(f'Setting configuration for optimization.') perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_mask_255) x_rec_feat = perceptual_model(x_rec_mask_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3]) loss = loss_pix + args.loss_weight_feat * loss_feat optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Load image list. logger.info(f'Loading target images and context images.') target_list = [] with open(args.target_list, 'r') as f: for line in f: target_list.append(line.strip()) num_targets = len(target_list) context_list = [] with open(args.context_list, 'r') as f: for line in f: context_list.append(line.strip()) num_contexts = len(context_list) num_pairs = num_targets * num_contexts # Invert images. logger.info(f'Start diffusion.') save_interval = args.num_iterations // args.num_results headers = [ 'Target Image', 'Context Image', 'Stitched Image', 'Encoder Output' ] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_pairs, num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) latent_codes_enc = [] latent_codes = [] for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False): # Load target. target_image = resize_image(load_image(target_list[target_idx]), (image_size, image_size)) visualizer.set_cell(target_idx * num_contexts, 0, image=target_image) for context_idx in tqdm(range(0, num_contexts, args.batch_size), desc='Context ID', leave=False): row_idx = target_idx * num_contexts + context_idx batch = context_list[context_idx:context_idx + args.batch_size] for i, context_image_path in enumerate(batch): context_image = resize_image(load_image(context_image_path), (image_size, image_size)) visualizer.set_cell(row_idx + i, 1, image=context_image) context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = (target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]) visualizer.set_cell(row_idx + i, 2, image=context_image) images[i] = np.transpose(context_image, [2, 0, 1]) inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. sess.run([setter], {x: inputs}) outputs = sess.run([wp, x_rec]) latent_codes_enc.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): visualizer.set_cell(row_idx + i, 3, image=outputs[1][i]) # Optimize latent codes. col_idx = 4 for step in tqdm(range(1, args.num_iterations + 1), leave=False): sess.run(train_op, {x: inputs}) if step == args.num_iterations or step % save_interval == 0: outputs = sess.run([wp, x_rec]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): visualizer.set_cell(row_idx + i, col_idx, image=outputs[1][i]) col_idx += 1 latent_codes.append(outputs[0][0:len(batch)]) # Save results. code_shape = [num_targets, num_contexts] + list(latent_shape[1:]) np.save(f'{output_dir}/{job_name}_encoded_codes.npy', np.concatenate(latent_codes_enc, axis=0).reshape(code_shape)) np.save(f'{output_dir}/{job_name}_inverted_codes.npy', np.concatenate(latent_codes, axis=0).reshape(code_shape)) visualizer.save(f'{output_dir}/{job_name}.html')
def training_loop( submit_config, Encoder_args = {}, E_opt_args = {}, D_opt_args = {}, E_loss_args = EasyDict(), D_loss_args = {}, lr_args = EasyDict(), tf_config = {}, dataset_args = EasyDict(), decoder_pkl = EasyDict(), inversion_pkl = EasyDict(), drange_data = [0, 255], drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. mirror_augment = False, resume_run_id = config.ENCODER_PICKLE_DIR, # Run ID or network pkl to resume training from, None = start from scratch. resume_snapshot = None, # Snapshot index to resume training from, None = autodetect. image_snapshot_ticks = 1, # How often to export image snapshots? network_snapshot_ticks = 4, # How often to export network snapshots? max_iters = 150000): tflib.init_tf(tf_config) with tf.name_scope('input'): placeholder_real_portraits_train = tf.placeholder(tf.float32, [submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_portraits_train') placeholder_real_landmarks_train = tf.placeholder(tf.float32, [submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_landmarks_train') placeholder_real_shuffled_train = tf.placeholder(tf.float32, [submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_shuffled_train') placeholder_landmarks_shuffled_train = tf.placeholder(tf.float32, [submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size], name='placeholder_landmarks_shuffled_train') placeholder_real_portraits_test = tf.placeholder(tf.float32, [submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_portraits_test') placeholder_real_landmarks_test = tf.placeholder(tf.float32, [submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_landmarks_test') placeholder_real_shuffled_test = tf.placeholder(tf.float32, [submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_shuffled_test') placeholder_real_landmarks_shuffled_test = tf.placeholder(tf.float32, [submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size], name='placeholder_real_landmarks_shuffled_test') real_split_landmarks = tf.split(placeholder_real_landmarks_train, num_or_size_splits=submit_config.num_gpus, axis=0) real_split_portraits = tf.split(placeholder_real_portraits_train, num_or_size_splits=submit_config.num_gpus, axis=0) real_split_shuffled = tf.split(placeholder_real_shuffled_train, num_or_size_splits=submit_config.num_gpus, axis=0) real_split_lm_shuffled = tf.split(placeholder_landmarks_shuffled_train, num_or_size_splits=submit_config.num_gpus, axis=0) placeholder_training_flag = tf.placeholder(tf.string, name='placeholder_training_flag') with tf.device('/gpu:0'): if resume_run_id is not None: network_pkl = misc.locate_network_pkl(resume_run_id, resume_snapshot) print('Loading networks from "%s"...' % network_pkl) E, G, D, Gs = misc.load_pkl(network_pkl) start = int(network_pkl.split('-')[-1].split('.')[0]) // submit_config.batch_size print('Start: ', start) else: print('Constructing networks...') G, _, Gs = misc.load_pkl(decoder_pkl.decoder_pkl) # don't use pre-trained discriminator! num_layers = Gs.components.synthesis.input_shape[1] # here we add a new discriminator! D = tflib.Network('D', # name of the network how we call it num_channels=3, resolution=128, label_size=0, #some needed for this build function func_name="training.networks_stylegan.D_basic") # function of that network. more was not passed in d_args! # input is not passed here (just construction - note that we do not call the actual function!). Instead, network will inspect build function and require it for the get_output_for function. print("Created new Discriminator!") E = tflib.Network('E', size=submit_config.image_size, filter=64, filter_max=1024, num_layers=num_layers, phase=True, **Encoder_args) start = 0 Inv, _, _, _ = misc.load_pkl(inversion_pkl.inversion_pkl) E.print_layers(); Gs.print_layers(); D.print_layers() global_step0 = tf.Variable(start, trainable=False, name='learning_rate_step') learning_rate = tf.train.exponential_decay(lr_args.learning_rate, global_step0, lr_args.decay_step, lr_args.decay_rate, staircase=lr_args.stair) add_global0 = global_step0.assign_add(1) E_opt = tflib.Optimizer(name='TrainE', learning_rate=learning_rate, **E_opt_args) D_opt = tflib.Optimizer(name='TrainD', learning_rate=learning_rate, **D_opt_args) E_loss_rec = 0. E_loss_adv = 0. D_loss_real = 0. D_loss_fake = 0. D_loss_grad = 0. for gpu in range(submit_config.num_gpus): print('build graph on gpu %s' % str(gpu)) with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): E_gpu = E if gpu == 0 else E.clone(E.name + '_shadow') D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') G_gpu = Gs if gpu == 0 else Gs.clone(Gs.name + '_shadow') Inv_gpu = Inv if gpu == 0 else Inv.clone(Inv.name + '_shadow') perceptual_model = PerceptualModel(img_size=[E_loss_args.perceptual_img_size, E_loss_args.perceptual_img_size], multi_layers=False) real_portraits_gpu = process_reals(real_split_portraits[gpu], mirror_augment, drange_data, drange_net) shuffled_portraits_gpu = process_reals(real_split_shuffled[gpu], mirror_augment, drange_data, drange_net) real_landmarks_gpu = process_reals(real_split_landmarks[gpu], mirror_augment, drange_data, drange_net) shuffled_landmarks_gpu = process_reals(real_split_lm_shuffled[gpu], mirror_augment, drange_data, drange_net) with tf.name_scope('E_loss'), tf.control_dependencies(None): E_loss, recon_loss, adv_loss = dnnlib.util.call_func_by_name(E=E_gpu, G=G_gpu, D=D_gpu, Inv=Inv_gpu, perceptual_model=perceptual_model, real_portraits=real_portraits_gpu, shuffled_portraits=shuffled_portraits_gpu, real_landmarks=real_landmarks_gpu, shuffled_landmarks=shuffled_landmarks_gpu, training_flag=placeholder_training_flag, **E_loss_args) E_loss_rec += recon_loss E_loss_adv += adv_loss with tf.name_scope('D_loss'), tf.control_dependencies(None): D_loss, loss_fake, loss_real, loss_gp = dnnlib.util.call_func_by_name(E=E_gpu, G=G_gpu, D=D_gpu, Inv=Inv_gpu, real_portraits=real_portraits_gpu, shuffled_portraits=shuffled_portraits_gpu, real_landmarks=real_landmarks_gpu, training_flag=placeholder_training_flag, **D_loss_args) # change signature in ... D_loss_real += loss_real D_loss_fake += loss_fake D_loss_grad += loss_gp with tf.control_dependencies([add_global0]): E_opt.register_gradients(E_loss, E_gpu.trainables) D_opt.register_gradients(D_loss, D_gpu.trainables) E_loss_rec /= submit_config.num_gpus E_loss_adv /= submit_config.num_gpus D_loss_real /= submit_config.num_gpus D_loss_fake /= submit_config.num_gpus D_loss_grad /= submit_config.num_gpus E_train_op = E_opt.apply_updates() D_train_op = D_opt.apply_updates() print('building testing graph...') fake_X_val = test(E, Gs, Inv, placeholder_real_portraits_test, placeholder_real_landmarks_test, placeholder_real_shuffled_test, submit_config) inv_X_val = test_inversion(E, Gs, Inv, placeholder_real_portraits_test, placeholder_real_landmarks_test, placeholder_real_shuffled_test, submit_config) #sampled_portraits_val = sample_random_portraits(Gs, submit_config.batch_size) #sampled_portraits_val_test = sample_random_portraits(Gs, submit_config.batch_size_test) sess = tf.get_default_session() print('Getting training data...') # x_batch is a batch of (2, ..., ..., ...) records! stack_batch_train = get_train_data(sess, data_dir=dataset_args.data_train, submit_config=submit_config, mode='train') stack_batch_test = get_train_data(sess, data_dir=dataset_args.data_test, submit_config=submit_config, mode='test') stack_batch_train_secondary = get_train_data(sess, data_dir=dataset_args.data_train, submit_config=submit_config, mode='train_secondary') stack_batch_test_secondary = get_train_data(sess, data_dir=dataset_args.data_test, submit_config=submit_config, mode='test_secondary') summary_log = tf.summary.FileWriter(config.getGdrivePath()) cur_nimg = start * submit_config.batch_size cur_tick = 0 tick_start_nimg = cur_nimg start_time = time.time() init_fix = tf.initialize_variables( [global_step0], name='init_fix' ) sess.run(init_fix) print('Optimization starts!!!') # here is the actual training loop: all iterations for it in range(start, max_iters): batch_stacks = sess.run(stack_batch_train) batch_portraits = batch_stacks[:,0,:,:,:] batch_landmarks = batch_stacks[:,1,:,:,:] batch_stacks_secondary = sess.run(stack_batch_train_secondary) batch_shuffled = batch_stacks_secondary[:,0,:,:,:] batch_lm_shuffled = batch_stacks_secondary[:,1,:,:,:] training_flag = "pose" feed_dict_1 = {placeholder_real_portraits_train: batch_portraits, placeholder_real_landmarks_train: batch_landmarks, placeholder_real_shuffled_train:batch_shuffled, placeholder_landmarks_shuffled_train:batch_lm_shuffled, placeholder_training_flag: training_flag} # here we query these encoder- and discriminator losses. as input we provide: batch_stacks = batch of images + landmarks. _, recon_, adv_ = sess.run([E_train_op, E_loss_rec, E_loss_adv], feed_dict_1) _, d_r_, d_f_, d_g_= sess.run([D_train_op, D_loss_real, D_loss_fake, D_loss_grad], feed_dict_1) cur_nimg += submit_config.batch_size if it % 50 == 0: print('Iter: %06d recon_loss: %-6.4f adv_loss: %-6.4f d_r_loss: %-6.4f d_f_loss: %-6.4f d_reg: %-6.4f time:%-12s' % ( it, recon_, adv_, d_r_, d_f_, d_g_, dnnlib.util.format_time(time.time() - start_time))) sys.stdout.flush() tflib.autosummary.save_summaries(summary_log, it) if it % 500 == 0: batch_stacks_test = sess.run(stack_batch_test) batch_portraits_test = batch_stacks_test[:,0,:,:,:] batch_landmarks_test = batch_stacks_test[:,1,:,:,:] batch_stacks_test_secondary = sess.run(stack_batch_test_secondary) batch_shuffled_test = batch_stacks_test_secondary[:,0,:,:,:] batch_shuffled_lm_test = batch_stacks_test_secondary[:,1,:,:,:] batch_portraits_test = misc.adjust_dynamic_range(batch_portraits_test.astype(np.float32), [0, 255], [-1., 1.]) batch_landmarks_test = misc.adjust_dynamic_range(batch_landmarks_test.astype(np.float32), [0, 255], [-1., 1.]) batch_shuffled_test = misc.adjust_dynamic_range(batch_shuffled_test.astype(np.float32), [0, 255], [-1., 1.]) batch_shuffled_lm_test = misc.adjust_dynamic_range(batch_shuffled_lm_test.astype(np.float32), [0, 255], [-1., 1.]) # first: input + target landmarks = manipulated image samples_manipulated = sess.run(fake_X_val, feed_dict={placeholder_real_portraits_test: batch_portraits_test, placeholder_real_landmarks_test: batch_shuffled_lm_test}) # 2nd: manipulated + original landmarks samples_reconstructed = sess.run(fake_X_val, feed_dict={placeholder_real_portraits_test: samples_manipulated, placeholder_real_landmarks_test: batch_landmarks_test}) # also: show direct reconstruction samples_direct_rec = sess.run(fake_X_val, feed_dict={placeholder_real_portraits_test: batch_portraits_test, placeholder_real_landmarks_test: batch_landmarks_test}) # show results of the inverison portraits_inverted = sess.run(inv_X_val, feed_dict={placeholder_real_portraits_test: batch_portraits_test, placeholder_real_landmarks_test: batch_landmarks_test}) # show: original portrait, original landmark, diret reconstruction, fake landmark, manipulated, rec. debug_img = np.concatenate([ batch_landmarks_test, # original landmarks batch_portraits_test, # original portraits, samples_direct_rec, # direct batch_shuffled_lm_test, # shuffled landmarks samples_manipulated, # manipulated images samples_reconstructed, portraits_inverted# cycle reconstructed images ], axis=0) debug_img = adjust_pixel_range(debug_img) debug_img = fuse_images(debug_img, row=6, col=submit_config.batch_size_test) # save image results during training, first row is original images and the second row is reconstructed images save_image('%s/iter_%08d.png' % (submit_config.run_dir, cur_nimg), debug_img) # save image to gdrive img_path = os.path.join(config.getGdrivePath(), 'images', ('iter_%08d.png' % (cur_nimg))) save_image(img_path, debug_img) if cur_nimg >= tick_start_nimg + 65000: cur_tick += 1 tick_start_nimg = cur_nimg if cur_tick % network_snapshot_ticks == 0: pkl = os.path.join(submit_config.run_dir, 'network-snapshot-%08d.pkl' % (cur_nimg)) misc.save_pkl((E, G, D, Gs), pkl) # save network snapshot to gdrive pkl_drive = os.path.join(config.getGdrivePath(), 'snapshots', 'network-snapshot-%08d.pkl' % (cur_nimg)) misc.save_pkl((E, G, D, Gs), pkl_drive) misc.save_pkl((E, G, D, Gs), os.path.join(submit_config.run_dir, 'network-final.pkl')) summary_log.close()
def encode(_target_image, _context_image, _output_dir): gpu_id = '0' os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id print(_target_image) assert os.path.exists('./static/' + _target_image) _output_dir = _output_dir[:-4] output_dir = './static/' + _output_dir tflib.init_tf({'rnd.np_random_seed': 1000}) model_path = './styleganinv_face_256.pkl' with open(model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] crop_size = 110 # default crop size. center_x = 125 center_y = 145 crop_x = center_x - crop_size // 2 # default coordinate-X crop_y = center_y - crop_size // 2 # default coordinate-Y mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32) mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0 # Build graph. sess = tf.get_default_session() batch_size = 4 input_shape = E.input_shape input_shape[0] = batch_size # default batch size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1 x_mask_255 = (x_mask + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = batch_size # default batch size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1 x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. print("Diffusion : Settings for Optimization.") perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_mask_255) x_rec_feat = perceptual_model(x_rec_mask_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3]) loss_weight_feat = 5e-5 learning_rate = 0.01 loss = loss_pix + loss_weight_feat * loss_feat # default The perceptual loss scale for optimization. (default 5e-5) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Invert image num_iterations = 100 num_results = 5 save_interval = num_iterations // num_results images = np.zeros(input_shape, np.uint8) print("Load target image.") _target_image = './static/' + _target_image target_image = resize_image(load_image(_target_image), (image_size, image_size)) save_image('./' + output_dir + '_tar.png', target_image) print("Load context image.") context_image = getContextImage(_context_image) context_image = resize_image(load_image(context_image), (image_size, image_size)) save_image('./' + output_dir + '_cont.png', context_image) # Inverting Context Image. # context_image = invert(model_path, getContextImage(_context_image), wp, latent_shape) save_image('./' + output_dir + '_cont_inv.png', context_image) # Create Stitched Image # context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = ( # target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] # ) # context_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] = ( # target_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] # ) print("Cropping Image...") # context_image = cropImage(target_image, context_image) target_image, rect = cropWithWhite(target_image) target_image = fourChannels(target_image) target_image = cut(target_image) target_image = transBg(target_image) context_image = createStitchedImage(context_image, target_image, rect) save_image('./' + output_dir + '_sti.png', context_image) images[0] = np.transpose(context_image, [2, 0, 1]) input = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder print("Start Diffusion.") sess.run([setter], {x: input}) output = sess.run([wp, x_rec]) output[1] = adjust_pixel_range(output[1]) col_idx = 4 for step in tqdm(range(1, num_iterations + 1), leave=False): sess.run(train_op, {x: input}) if step == num_iterations or step % save_interval == 0: output = sess.run([wp, x_rec]) output[1] = adjust_pixel_range(output[1]) if step == num_iterations: save_image(f'{output_dir}.png', output[1][0]) col_idx += 1 exit()
def invert(model_path, _image, _wp, _latent_shape): print("Inverting") tflib.init_tf({'rnd.np_random_seed': 1000}) with open(model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] # Build graph. print("Inverting : Build Graph.") sess = tf.get_default_session() batch_size = 4 input_shape = E.input_shape input_shape[0] = batch_size # default batch size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255 wp = _wp x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, _latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. print("Inverting : Settings for Optimization.") perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_255) x_rec_feat = perceptual_model(x_rec_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3]) w_enc_new = E.get_output_for(x_rec, phase=False) wp_enc_new = tf.reshape(w_enc_new, _latent_shape) loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2]) loss = (loss_pix + 5e-5 * loss_feat + 2.0 * loss_enc) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Invert image print("Start Inverting.") num_iterations = 40 num_results = 2 save_interval = num_iterations // num_results context_images = np.zeros(input_shape, np.uint8) context_image = resize_image(load_image(_image), (image_size, image_size)) # Inverting Context Image. context_images[0] = np.transpose(context_image, [2, 0, 1]) context_input = context_images.astype(np.float32) / 255 * 2.0 - 1.0 sess.run([setter], {x: context_input}) context_output = sess.run([wp, x_rec]) context_output[1] = adjust_pixel_range(context_output[1]) context_image = np.transpose(context_images[0], [1, 2, 0]) for step in tqdm(range(1, num_iterations + 1), leave=False): sess.run(train_op, {x: context_input}) if step == num_iterations or step % save_interval == 0: context_output = sess.run([wp, x_rec]) context_output[1] = adjust_pixel_range(context_output[1]) if step == num_iterations: context_image = context_output[1][0] return context_image
def training_loop( submit_config, Encoder_args={}, D_args={}, G_args={}, E_opt_args={}, D_opt_args={}, E_loss_args=EasyDict(), D_loss_args={}, lr_args=EasyDict(), tf_config={}, dataset_args=EasyDict(), decoder_pkl=EasyDict(), drange_data=[0, 255], drange_net=[ -1, 1 ], # Dynamic range used when feeding image data to the networks. mirror_augment=False, filter=64, # Minimum number of feature maps in any layer. filter_max=512, # Maximum number of feature maps in any layer. resume_run_id=None, # Run ID or network pkl to resume training from, None = start from scratch. resume_snapshot=None, # Snapshot index to resume training from, None = autodetect. image_snapshot_ticks=1, # How often to export image snapshots? network_snapshot_ticks=10, # How often to export network snapshots? d_scale=0.1, # Decide whether to update discriminator. pretrained_D=True, # Whether to use pre trained Discriminator. max_iters=150000): tflib.init_tf(tf_config) with tf.name_scope('Input'): real_train = tf.placeholder(tf.float32, [ submit_config.batch_size, 3, submit_config.image_size, submit_config.image_size ], name='real_image_train') real_test = tf.placeholder(tf.float32, [ submit_config.batch_size_test, 3, submit_config.image_size, submit_config.image_size ], name='real_image_test') real_split = tf.split(real_train, num_or_size_splits=submit_config.num_gpus, axis=0) with tf.device('/gpu:0'): if resume_run_id is not None: network_pkl = misc.locate_network_pkl(resume_run_id, resume_snapshot) print('Loading networks from "%s"...' % network_pkl) E, G, D, Gs = misc.load_pkl(network_pkl) G_style_mod = tflib.Network('G_StyleMod', resolution=submit_config.image_size, label_size=0, **G_args) start = int(network_pkl.split('-')[-1].split('.') [0]) // submit_config.batch_size print('Start: ', start) else: print('Constructing networks...') G, PreD, Gs = misc.load_pkl(decoder_pkl.decoder_pkl) num_layers = Gs.components.synthesis.input_shape[1] E = tflib.Network('E_gpu0', size=submit_config.image_size, filter=filter, filter_max=filter_max, num_layers=num_layers, is_training=True, num_gpus=submit_config.num_gpus, **Encoder_args) OriD = tflib.Network('D_ori', resolution=submit_config.image_size, label_size=0, **D_args) G_style_mod = tflib.Network('G_StyleMod', resolution=submit_config.image_size, label_size=0, **G_args) if pretrained_D: D = PreD else: D = OriD start = 0 Gs_vars_pairs = { name: tflib.run(val) for name, val in Gs.components.synthesis.vars.items() } for g_name, g_val in G_style_mod.vars.items(): tflib.set_vars({g_val: Gs_vars_pairs[g_name]}) E.print_layers() Gs.print_layers() D.print_layers() global_step0 = tf.Variable(start, trainable=False, name='learning_rate_step') learning_rate = tf.train.exponential_decay(lr_args.learning_rate, global_step0, lr_args.decay_step, lr_args.decay_rate, staircase=lr_args.stair) add_global0 = global_step0.assign_add(1) E_opt = tflib.Optimizer(name='TrainE', learning_rate=learning_rate, **E_opt_args) if d_scale > 0: D_opt = tflib.Optimizer(name='TrainD', learning_rate=learning_rate, **D_opt_args) E_loss_rec = 0. E_loss_adv = 0. D_loss_real = 0. D_loss_fake = 0. D_loss_grad = 0. for gpu in range(submit_config.num_gpus): print('Building Graph on GPU %s' % str(gpu)) with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): E_gpu = E if gpu == 0 else E.clone(E.name[:-1] + str(gpu)) D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') G_gpu = G_style_mod if gpu == 0 else G_style_mod.clone( G_style_mod.name + '_shadow') feature_model = PerceptualModel(img_size=[ E_loss_args.perceptual_img_size, E_loss_args.perceptual_img_size ], multi_layers=False) real_gpu = process_reals(real_split[gpu], mirror_augment, drange_data, drange_net) with tf.name_scope('E_loss'), tf.control_dependencies(None): E_loss, recon_loss, adv_loss = dnnlib.util.call_func_by_name( E=E_gpu, G=G_gpu, D=D_gpu, feature_model=feature_model, reals=real_gpu, **E_loss_args) E_loss_rec += recon_loss E_loss_adv += adv_loss with tf.name_scope('D_loss'), tf.control_dependencies(None): D_loss, loss_fake, loss_real, loss_gp = dnnlib.util.call_func_by_name( E=E_gpu, G=G_gpu, D=D_gpu, reals=real_gpu, **D_loss_args) D_loss_real += loss_real D_loss_fake += loss_fake D_loss_grad += loss_gp with tf.control_dependencies([add_global0]): E_opt.register_gradients(E_loss, E_gpu.trainables) if d_scale > 0: D_opt.register_gradients(D_loss, D_gpu.trainables) E_loss_rec /= submit_config.num_gpus E_loss_adv /= submit_config.num_gpus D_loss_real /= submit_config.num_gpus D_loss_fake /= submit_config.num_gpus D_loss_grad /= submit_config.num_gpus E_train_op = E_opt.apply_updates() if d_scale > 0: D_train_op = D_opt.apply_updates() print('Building testing graph...') fake_X_val = test(E, G_style_mod, real_test, submit_config) sess = tf.get_default_session() print('Getting training data...') image_batch_train = get_train_data(sess, data_dir=dataset_args.data_train, submit_config=submit_config, mode='train') image_batch_test = get_train_data(sess, data_dir=dataset_args.data_test, submit_config=submit_config, mode='test') summary_log = tf.summary.FileWriter(submit_config.run_dir) cur_nimg = start * submit_config.batch_size cur_tick = 0 tick_start_nimg = cur_nimg start_time = time.time() print('Optimization starts!!!') for it in range(start, max_iters): batch_images = sess.run(image_batch_train) feed_dict = {real_train: batch_images} _, recon_, adv_, lr = sess.run( [E_train_op, E_loss_rec, E_loss_adv, learning_rate], feed_dict) if d_scale > 0: _, d_r_, d_f_, d_g_ = sess.run( [D_train_op, D_loss_real, D_loss_fake, D_loss_grad], feed_dict) cur_nimg += submit_config.batch_size run_time = time.time() - start_time iter_time = run_time / (it - start + 1) eta_time = iter_time * (max_iters - it - 1) if it % 50 == 0: print( 'Iter: %06d/%d, lr: %-.8f recon_loss: %-6.4f adv_loss: %-6.4f run_time:%-12s eta_time:%-12s' % (it, max_iters, lr, recon_, adv_, dnnlib.util.format_time(time.time() - start_time), dnnlib.util.format_time(eta_time))) if d_scale > 0: print('d_r_loss: %-6.4f d_f_loss: %-6.4f d_reg: %-6.4f ' % (d_r_, d_f_, d_g_)) sys.stdout.flush() tflib.autosummary.save_summaries(summary_log, it) if cur_nimg >= tick_start_nimg + 65000: cur_tick += 1 tick_start_nimg = cur_nimg if cur_tick % image_snapshot_ticks == 0: batch_images_test = sess.run(image_batch_test) batch_images_test = misc.adjust_dynamic_range( batch_images_test.astype(np.float32), [0, 255], [-1., 1.]) recon = sess.run(fake_X_val, feed_dict={real_test: batch_images_test}) orin_recon = np.concatenate([batch_images_test, recon], axis=0) orin_recon = adjust_pixel_range(orin_recon) orin_recon = fuse_images(orin_recon, row=2, col=submit_config.batch_size_test) # save image results during training, first row is original images and the second row is reconstructed images save_image( '%s/iter_%09d.png' % (submit_config.run_dir, cur_nimg), orin_recon) if cur_tick % network_snapshot_ticks == 0: pkl = os.path.join(submit_config.run_dir, 'network-snapshot-%09d.pkl' % (cur_nimg)) misc.save_pkl((E, G, D, Gs), pkl) misc.save_pkl((E, G, D, Gs), os.path.join(submit_config.run_dir, 'network-final.pkl')) summary_log.close()