def main2(seed): tflib.init_tf() #_G, _D, Gs = pickle.load(open("karras2019stylegan-ffhq-1024x1024.pkl", "rb")) _G, _D, Gs = pretrained_networks.load_networks("dummy") generator = Generator(Gs, batch_size=1, randomize_noise=False) Gs.print_layers() rnd = np.random.RandomState(None) fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) synthesis_kwargs = dict(output_transform=fmt, truncation_psi=0.7, minibatch_size=8) vectors = gen_img_with_18_512(Gs, fmt, rnd, dst_seeds=seed) np.save(os.path.join(result_dir, 'test_changed/0-original.npy'), vectors) create_order_npy("0-original.npy", vectors) # load all directions direction_vectors = "D:/Projects/training_datasets/emotions/style2/*.npy" dataset = glob.glob(direction_vectors) dataset = natural_sort(dataset) for npy in dataset: print(npy) file_name = os.path.basename(npy) file_name_no_extension = os.path.splitext(file_name)[0] print(file_name_no_extension) #vectors = create_full(vectors, npy, file_name_no_extension, Gs, generator) create_all(vectors, npy, file_name_no_extension, Gs, generator, cof)
def generate_image(latent_vector): generator = Generator(Gs_network, batch_size=1, randomize_noise=False) latent_vector = latent_vector.reshape((1, 18, 512)) generator.set_dlatents(latent_vector) img_array = generator.generate_images()[0] img = PIL.Image.fromarray(img_array, 'RGB') return img.resize((256, 256))
def init_dependencies(): tfl.init_tf() landmarks_model_path = unpack_bz2( get_file('shape_predictor_68_face_landmarks.dat.bz2', LANDMARKS_MODEL_URL, cache_subdir='cache')) landmarks_detector = LandmarksDetector(landmarks_model_path) if os.path.exists(args['load_resnet']): print("Loading ResNet Model:") ff_model = load_model(args['load_resnet']) with open(args['model_dir'], 'rb') as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args['batch_size'], clipping_threshold=args['clipping_threshold'], tiled_dlatent=args['tile_dlatents'], model_res=args['model_res'], randomize_noise=args['randomize_noise']) perceptual_model = PerceptualModel(args, perc_model=None, batch_size=args['batch_size']) perceptual_model.build_perceptual_model(generator, discriminator_network) return landmarks_detector, ff_model, generator, perceptual_model
def mix_pic(npy1, npy2, psi=0.5, begin=0, end=8): os.makedirs(config.generated_dir, exist_ok=True) print("载入图像向量1...") img_src1 = np.load(os.path.join(config.src_latents_dir, npy1)) print("载入图像向量2...") img_src2 = np.load(os.path.join(config.src_latents_dir, npy2)) tmp_vec = img_src1.copy() print("载入混合向量...") tmp_vec = img_src1 * psi + img_src2 * (1 - psi) new_latent_vector = tmp_vec.reshape((1, 18, 512)) tflib.init_tf() Gs_network = load_Gs(Model) global generator generator = Generator(Gs_network, batch_size=1, randomize_noise=False) generator.set_dlatents(new_latent_vector) print("生成图像...") new_person_image = generator.generate_images()[0] canvas = PIL.Image.new('RGB', (1024, 1024), 'white') temp_img = PIL.Image.fromarray(new_person_image, 'RGB') filename = npy1[:8] + '_' + npy2[4:8] + 'mixed.png' print("保存混合图像...") canvas.paste(temp_img, ((0, 0))) canvas.save(os.path.join(config.generated_dir, filename)) npy_file = os.path.join(config.src_latents_dir, filename[:-4] + '.npy') np.save(npy_file, new_latent_vector) print("Done!") return resizeImg(temp_img)
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_image = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)] ref_image = list(filter(os.path.isfile, ref_image)) already_processed = set(map(lambda x: x.split(".")[0], os.listdir(args.generated_images_dir))) ref_images = list(filter(lambda x: x.split("/")[-1].split(".")[0] not in already_processed, ref_image)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: pbar.set_description(' '.join(names)+' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def main(): # 初始化 tflib.init_tf() # 调用预训练模型 Gs_network = load_Gs(Model) generator = Generator(Gs_network, batch_size=1, randomize_noise=False) # 读取对应真实人脸的latent,用于图像变化,qing_01.npy可以替换为你自己的文件名 os.makedirs(config.dlatents_dir, exist_ok=True) person = np.load(os.path.join(config.dlatents_dir, 'qing_01.npy')) # 读取已训练好的用于改变人脸特性/表情的向量 # 包括:改变年龄、改变水平角度、改变性别、改变眼睛大小、是否佩戴眼镜、改变笑容等 age_direction = np.load('ffhq_dataset/latent_directions/age.npy') angle_direction = np.load('ffhq_dataset/latent_directions/angle_horizontal.npy') gender_direction = np.load('ffhq_dataset/latent_directions/gender.npy') eyes_direction = np.load('ffhq_dataset/latent_directions/eyes_open.npy') glasses_direction = np.load('ffhq_dataset/latent_directions/glasses.npy') smile_direction = np.load('ffhq_dataset/latent_directions/smile.npy') # 混合人脸和变化向量,生成变化后的图片 move_and_show(generator, 0, person, age_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6]) move_and_show(generator, 1, person, angle_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6]) move_and_show(generator, 2, person, gender_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6]) move_and_show(generator, 3, person, eyes_direction, [-3, -2, -1, -0.5, 0, 0.5, 1, 2, 3]) move_and_show(generator, 4, person, glasses_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6]) move_and_show(generator, 5, person, smile_direction, [-3, -2, -1, -0.5, 0, 0.5, 1, 2, 3])
def setup(): tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, 1, randomize_noise=False) perceptual_model = PerceptualModel(256, layer=9, batch_size=1) perceptual_model.build_perceptual_model(generator.generated_image) return perceptual_model, generator
def encode_images(src_dir, generated_images_dir, dlatent_dir, Gs_network, batch_size=1, image_size=256, lr=1., iterations=1000, randomize_noise=False): ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % src_dir) os.makedirs(generated_images_dir, exist_ok=True) os.makedirs(dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() generator = Generator(Gs_network, batch_size, randomize_noise=randomize_noise) perceptual_model = PerceptualModel(image_size, layer=9, batch_size=batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, batch_size), total=len(ref_images) // batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=iterations, learning_rate=lr) pbar = tqdm(op, leave=False, total=iterations) for loss in pbar: pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() return generated_dlatents
def initUtils(self): tflib.init_tf() with dnnlib.util.open_url(stylegan_utils.URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs = pickle.load(f) self.generator = Generator(Gs, batch_size=1, randomize_noise=False) self.Gs = Gs return self.generator, self.Gs
def main(): # 初始化 tflib.init_tf() # 调用预训练模型 Gs_network = load_Gs(Model) generator = Generator(Gs_network, batch_size=1, randomize_noise=False) # 读取对应真实人脸的latent,用于图像变化,qing_01.npy可以替换为你自己的文件名 os.makedirs(config.dlatents_dir, exist_ok=True) target = np.load(os.path.join(config.dlatents_dir, '1_01_1.npy'))
def ImageFromVec(npy): img_src = np.load(os.path.join(config.src_latents_dir, npy)) tflib.init_tf() Gs_network = load_Gs(Model) global generator generator = Generator(Gs_network, batch_size=1, randomize_noise=False) generator.set_dlatents(img_src) print("生成图像...") new_image = generator.generate_images()[0] temp_img = PIL.Image.fromarray(new_image, 'RGB') return resizeImg(temp_img)
def setup(): tflib.init_tf() # Load pre-trained network. url = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f: _G, _D, Gs = pickle.load(f) generator = Generator( Gs, batch_size=1, randomize_noise=False) # -- RUNNING >1 TIMES THROWS ERROR fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) return [_G, _D, Gs, generator, fmt]
def encode(self, src_dir, generated_images_dir, dlatent_dir, batch_size=1, image_size=256, lr=1, iterations=1000, randomize_noise=False): """ Find latent representation of reference images using perceptual loss Params: src_dir: Directory for storing genrated images generated_images_dir: Directory for storing generated images dlatent_dir: Directory for storing dlatent representations batch_size: Batch size for generator and perceptual model image_size: Size of images for perceptual model lr: Size of images for perceptual model iterations: Number of optimization steps for each batch randomize_noise: Add noise to dlatents during optimization """ ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % src_dir) os.makedirs(generated_images_dir, exist_ok=True) os.makedirs(dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(self.URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, batch_size, randomize_noise=randomize_noise) perceptual_model = PerceptualModel(image_size, layer=9, batch_size=batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(self.split_to_batches(ref_images, batch_size), total=len(ref_images)//batch_size): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=iterations, learning_rate=lr) pbar = tqdm(op, leave=False, total=iterations) for loss in pbar: pbar.set_description(' '.join(names)+' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def generate_image(latent_vector): from encoder.generator_model import Generator import dnnlib import dnnlib.tflib as tflib tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, batch_size=1, randomize_noise=False) latent_vector = latent_vector.reshape((1, 18, 512)) generator.set_dlatents(latent_vector) img_array = generator.generate_images()[0] img = PIL.Image.fromarray(img_array, 'RGB') return img.resize((512, 512))
def setup(opts): # Initialize generator and perceptual model global perceptual_model global generator tflib.init_tf() model = opts['checkpoint'] print("open model %s" % model) with open(model, 'rb') as file: G, D, Gs = pickle.load(file) Gs.print_layers() generator = Generator(Gs, batch_size=1, randomize_noise=False) perceptual_model = PerceptualModel(512, layer=9, batch_size=1) perceptual_model.build_perceptual_model(generator.generated_image) return generator
def main(): tflib.init_tf() Gs_network = load_Gs(Model) generator = Generator(Gs_network, batch_size=1, randomize_noise=False) os.makedirs(config.dlatents_dir, exist_ok=True) target = np.load(os.path.join(config.dlatents_dir, '1_01.npy')) #move_and_show(generator,target, "age", [-20, -16, -12, -8, 0, 8, 12, 16, 20]) #move_and_show(generator,target, "race_black", [-40, -32, -24, -16, 0, 16, 24, 32, 40]) #move_and_show(generator,target, "gender", [-20, -16, -12, -8, 0, 8, 12, 16, 20]) move_and_show(generator, target, "eyes_open", [ -12, -11, -10, -9, -8, -7, -6, -5, -4, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24 ])
def setup(opts): tflib.init_tf() model = opts['checkpoint'] print("open model %s" % model) with open(model, 'rb') as file: G, D, Gs = pickle.load(file) Gs.print_layers() # load latent representation p1 = inputs['people_vector'] global latent_vector_1 latent_vector_1 = np.load(p1) p2 = inputs['people_vector2'] global latent_vector_2 latent_vector_2 = np.load(p2) global generator generator = Generator(Gs, batch_size=1, randomize_noise=False) return generator
def load_snapshot(self): # Load pre-trained network. tflib.init_tf() self.rnd = np.random.RandomState() #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-final.pkl") #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-009247.pkl") #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-008044.pkl") #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-024287.pkl") url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-013458.pkl") #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-010450.pkl") #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-015263.pkl") #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-011653.pkl") with open(url, 'rb') as f: self._G, self._D, self.Gs = pickle.load(f) self.generator = Generator(self.Gs, batch_size=1, randomize_noise=False) print(self.Gs)
def onLoadFile(): global generator, encoderGenerator, SIZE_LATENT_SPACE, OUTPUT_RESOLUTION, pointsSaved, modelPath # modelPath = filedialog.askopenfilename(initialdir = PATH_LOAD_FILE, title = "Select file") modelPath = "/media/leandro/stuff/Data/ahegao/network-snapshot-011225.pkl" with open(modelPath, 'rb') as file: _, _, generator = pickle.load(file) SIZE_LATENT_SPACE = int(generator.list_layers()[0][1].shape[1]) OUTPUT_RESOLUTION = int(generator.list_layers()[-1][1].shape[2]) root.title('PGAN Generator - %s' % modelPath) # if canvas: # canvas.delete("all") if pointList: pointList.delete(0, tk.END) pointsSaved = [] encoderGenerator = Generator(generator, 1)
def generate_image(latent_vector): os.chdir('/home/bizon/CBIS-DDSM/other/fuse_face_flask/stylegan-encoder') print('dir changed') ################################# # init generator # URL_FFHQ = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, batch_size=1, randomize_noise=False) ################################# os.chdir('/home/bizon/CBIS-DDSM/other/fuse_face_flask') latent_vector = latent_vector.reshape((1, 18, 512)) generator.set_dlatents(latent_vector) img_array = generator.generate_images()[0] img = PIL.Image.fromarray(img_array, 'RGB') return img
def choice(choice, npyfile, filename): tflib.init_tf() Gs_network = load_Gs(Model) global generator, flag, fname fname = filename flag = choice generator = Generator(Gs_network, batch_size=1, randomize_noise=False) os.makedirs(config.dlatents_dir, exist_ok=True) # person = np.load(os.path.join(config.dlatents_dir, 'Scarlett Johansson01_01.npy')) person = np.load(os.path.join(config.src_latents_dir, npyfile)) #(1,18,512) # Loading already learned latent directions direction_list = [] direction_list.append(np.load('ffhq_dataset/latent_directions/age.npy')) direction_list.append( np.load('ffhq_dataset/latent_directions/angle_horizontal.npy')) direction_list.append(np.load('ffhq_dataset/latent_directions/gender.npy')) direction_list.append( np.load('ffhq_dataset/latent_directions/eyes_open.npy')) direction_list.append( np.load('ffhq_dataset/latent_directions/glasses.npy')) direction_list.append(np.load('ffhq_dataset/latent_directions/smile.npy')) direction_list.append( np.load('ffhq_dataset/latent_directions/race_white.npy')) direction_list.append( np.load('ffhq_dataset/latent_directions/race_yellow.npy')) direction_list.append( np.load('ffhq_dataset/latent_directions/race_black.npy')) coeffs_list = [] coeffs_list.append([-20, -16, -12, -8, 0, 8, 12, 16, 20]) coeffs_list.append([-40, -32, -24, -16, 0, 16, 24, 32, 40]) coeffs_list.append([-40, -32, -24, -16, 0, 16, 24, 32, 40]) coeffs_list.append([-8, -6, -4, -2, 0, 2, 4, 6, 8]) coeffs_list.append([-16, -12, -8, -4, 0, 4, 8, 12, 16]) coeffs_list.append([-16, -12, -8, -4, 0, 4, 8, 12, 16]) coeffs_list.append([-10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10]) coeffs_list.append([-10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10]) coeffs_list.append([-10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10]) move_and_show(person, direction_list[choice], coeffs_list[choice])
def __init__(self): root_dir = 'latent_representations/' listdir = [] sort_listdir = [] num = {} for i, f in enumerate(os.listdir(root_dir)): listdir.append(f) num[i] = int(f.split('_')[0]) num = sorted(num.items(), key=operator.itemgetter(1)) for i in range(len(num)): sort_listdir.append(listdir[num[i][0]]) print(sort_listdir) self.latent_vectors = [np.load(root_dir + f) for f in sort_listdir] self.directions = {'smile': np.load('ffhq_dataset/latent_directions/smile.npy'), 'gender': np.load('ffhq_dataset/latent_directions/gender.npy'), 'age' : np.load('ffhq_dataset/latent_directions/age.npy')} self.new_latent_vector = np.zeros((18,512)) tflib.init_tf() with open('karras2019stylegan-ffhq-1024x1024.pkl', 'rb') as f: generator_network, discriminator_network, Gs_network = pickle.load(f) self.generator = Generator(Gs_network, batch_size=1, randomize_noise=True)
os.makedirs(args.dlabel_dir, exist_ok=True) # Initialize generator and perceptual model # load network network_pkl = misc.locate_network_pkl(args.results_dir) print('Loading network from "%s"...' % network_pkl) G, D, Gs = misc.load_network_pkl(args.results_dir, None) # initiate random input latents = misc.random_latents(1, Gs, random_state=np.random.RandomState(800)) labels = np.random.rand(1, args.labels_size) generator = Generator(Gs, labels_size=572, batch_size=1, clipping_threshold=args.clipping_threshold, model_res=args.resolution) perc_model = None if (args.use_lpips_loss > 0.00000001): with open(args.load_perc_model, "rb") as f: perc_model = pickle.load(f) ff_model = None beautyrater_model = beautyrater.BeautyRater(args.load_vgg_beauty_rater_model) facenet_model = facenet.FaceNet(args.load_facenet_model) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator)
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument('--dlatent_avg', default='', help='Use dlatent from file specified here for truncation instead of dlatent_avg from Gs') parser.add_argument('--model_url', default='https://drive.google.com/uc?id=1aPjeguDIRE0hs4_PHiRghK1Y2Qh3zOi1', help='Fetch a StyleGAN model to train on from this URL') # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--optimizer', default='ggt', help='Optimization algorithm used for optimizing dlatents') # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.25, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument('--decay_steps', default=4, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument('--early_stopping', default=True, help='Stop early once training stabilizes', type=str2bool, nargs='?', const=True) parser.add_argument('--early_stopping_threshold', default=0.5, help='Stop after this threshold has been reached', type=float) parser.add_argument('--early_stopping_patience', default=10, help='Number of iterations to wait below threshold', type=int) parser.add_argument('--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument('--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents') parser.add_argument('--use_preprocess_input', default=True, help='Call process_input() first before using feed forward net', type=str2bool, nargs='?', const=True) parser.add_argument('--use_best_loss', default=True, help='Output the lowest loss value found as the solution', type=str2bool, nargs='?', const=True) parser.add_argument('--average_best_loss', default=0.25, help='Do a running weighted average with the previous best dlatents found', type=float) parser.add_argument('--sharpen_input', default=True, help='Sharpen the input images', type=str2bool, nargs='?', const=True) # Loss function options parser.add_argument('--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument('--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_mssim_loss', default=200, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_l1_penalty', default=0.5, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_discriminator_loss', default=0.5, help='Use trained discriminator to evaluate realism.', type=float) parser.add_argument('--use_adaptive_loss', default=False, help='Use the adaptive robust loss function from Google Research for pixel and VGG feature loss.', type=str2bool, nargs='?', const=True) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=str2bool, nargs='?', const=True) parser.add_argument('--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=str2bool, nargs='?', const=True) parser.add_argument('--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=str2bool, nargs='?', const=True) parser.add_argument('--face_mask', default=True, help='Generate a mask for predicting only the face area', type=str2bool, nargs='?', const=True) parser.add_argument('--use_grabcut', default=True, help='Use grabcut algorithm on the face mask to better segment the foreground', type=str2bool, nargs='?', const=True) parser.add_argument('--scale_mask', default=1.4, help='Look over a wider section of foreground for grabcut', type=float) parser.add_argument('--composite_mask', default=True, help='Merge the unmasked area back into the generated image', type=str2bool, nargs='?', const=True) parser.add_argument('--composite_blur', default=8, help='Size of blur filter to smoothly composite the images', type=int) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument('--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator, discriminator_network) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter(os.path.join(args.video_dir, f'{name}.avi'),cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size,args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load(os.path.join(args.load_last, f'{name}.npy')),axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents,dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): from keras.applications.resnet50 import preprocess_input print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet from efficientnet import preprocess_input print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) if (ff_model is not None): # predict initial dlatents with ResNet model if (args.use_preprocess_input): dlatents = ff_model.predict(preprocess_input(load_images(images_batch,image_size=args.resnet_image_size))) else: dlatents = ff_model.predict(load_images(images_batch,image_size=args.resnet_image_size)) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None avg_loss_count = 0 if args.early_stopping: avg_loss = prev_loss = None for loss_dict in pbar: if args.early_stopping: # early stopping feature if prev_loss is not None: if avg_loss is not None: avg_loss = 0.5 * avg_loss + (prev_loss - loss_dict["loss"]) if avg_loss < args.early_stopping_threshold: # count while under threshold; else reset avg_loss_count += 1 else: avg_loss_count = 0 if avg_loss_count > args.early_stopping_patience: # stop once threshold is reached print("") break else: avg_loss = prev_loss - loss_dict["loss"] pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: if best_dlatent is None or args.average_best_loss <= 0.00000001: best_dlatent = generator.get_dlatents() else: best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents() if args.use_best_loss: generator.set_dlatents(best_dlatent) best_loss = loss_dict["loss"] if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray(batch_frames[i], 'RGB').resize((args.video_size,args.video_size),PIL.Image.LANCZOS) video_out[name].write(cv2.cvtColor(np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() prev_loss = loss_dict["loss"] if not args.use_best_loss: best_loss = prev_loss print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them if args.use_best_loss: generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_path, img_name in zip(generated_images, generated_dlatents, images_batch, names): mask_img = None if args.composite_mask and (args.load_mask or args.face_mask): _, im_name = os.path.split(img_path) mask_img = os.path.join(args.mask_dir, f'{im_name}') if args.composite_mask and mask_img is not None and os.path.isfile(mask_img): orig_img = PIL.Image.open(img_path).convert('RGB') width, height = orig_img.size imask = PIL.Image.open(mask_img).convert('L').resize((width, height)) imask = imask.filter(ImageFilter.GaussianBlur(args.composite_blur)) mask = np.array(imask)/255 mask = np.expand_dims(mask,axis=-1) img_array = mask*np.array(img_array) + (1.0-mask)*np.array(orig_img) img_array = img_array.astype(np.uint8) #img_array = np.where(mask, np.array(img_array), orig_img) img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
import PIL.Image import numpy as np import dnnlib import dnnlib.tflib as tflib import config from encoder.generator_model import Generator import matplotlib.pyplot as plt URL_FFHQ = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, batch_size=1, randomize_noise=False) def generate_image(latent_vector): latent_vector = latent_vector.reshape((1, 18, 512)) generator.set_dlatents(latent_vector) img_array = generator.generate_images()[0] img = PIL.Image.fromarray(img_array, 'RGB') return img.resize((256, 256)) def move_and_show(latent_vector, direction, coeffs): fig, ax = plt.subplots(1, len(coeffs), figsize=(15, 10), dpi=80) for i, coeff in enumerate(coeffs): new_latent_vector = latent_vector.copy() new_latent_vector[:8] = (latent_vector + coeff * direction)[:8]
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument( '--network_pkl', default='gdrive:networks/stylegan2-ffhq-config-f.pkl', help='Path to local copy of stylegan2-ffhq-config-f.pkl') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int) parser.add_argument('--layer', default=9, help='Final layer for perceptual model', type=int) parser.add_argument('--model', default="vgg16", help='Model for perceptual model') # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks( args.network_pkl) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) perceptual_model = PerceptualModel(args.image_size, layer=args.layer, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image, args.model) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') png_directory = args.generated_images_dir + f'{img_name}/{args.model}/lr{args.lr}' npy_directory = args.dlatent_dir + f'{img_name}/{args.model}/lr{args.lr}' os.makedirs(png_directory, exist_ok=True) os.makedirs(npy_directory, exist_ok=True) img.save(os.path.join(png_directory, f'layer{args.layer}.png'), 'PNG') np.save(os.path.join(npy_directory, f'layer{args.layer}.npy'), dlatent) generator.reset_dlatents()
def main(): src_dir = os.path.join("output", "aligned_images") generated_images_dir = os.path.join("output", "generated_images") generated_videos_dir = os.path.join("output", "generated_videos") dlatent_dir = os.path.join("output", "latent_representations") # for now it's unclear if larger batch leads to better performance/quality # Also, I may have broken >1 batch sizes, but happily they didn't seem to provide meaningful time savings anyway. batch_size = 1 # Perceptual model params image_size = 1024 iterations = 600 # Generator params randomize_noise = False ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)] ref_images = list(sorted(filter(os.path.isfile, ref_images))) if len(ref_images) == 0: raise Exception('%s is empty' % src_dir) os.makedirs(generated_images_dir, exist_ok=True) os.makedirs(dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() # I saved the FFHQ network as a pickle file to my hard drive to avoid relying on the Nvidia Google Drive share. local_network_path = "karras2019stylegan-ffhq-1024x1024.pkl" if os.path.exists(local_network_path): with open(local_network_path, "rb") as f: generator_network, discriminator_network, Gs_network = pickle.load( f) else: URL_FFHQ = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load( f) # Set tiled_dlatent=False if you want to generate an 18x512 dlatent like in Puzer's original repo. # Set tiled_dlatent=True if you want to generate a 1x512 dlatent (subsequently tiled back to 18x512) # like the mapping network outputs. generator = Generator(Gs_network, batch_size, randomize_noise=randomize_noise, tiled_dlatent=True) perceptual_model = PerceptualDiscriminatorModel(image_size, batch_size=batch_size) perceptual_model.build_perceptual_model(discriminator_network, generator.generator_output, generator.generated_image, generator.dlatent_variable) # Optimize (only) dlatents by minimizing perceptual loss # between reference and generated images in feature space images = [] video_frames = 100 # Set to >0 to save a video of the training, or to 0 to disable. if video_frames > 0: steps_per_frame = iterations / video_frames steps_until_frame = 0 for images_batch in split_to_batches(ref_images, batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(iterations=iterations) pbar = tqdm(op, leave=False, total=iterations) best_loss = None best_dlatent = None dlatent_frames = [] for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if video_frames > 0: # If we're recording a video, consider taking a dlatent snapshot for later assembly. if steps_until_frame <= 0: dlatent_frames.append(generator.get_dlatents()[0]) steps_until_frame += steps_per_frame steps_until_frame -= 1. generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) # Generate images from found dlatents and save them. generated_images = generator.generate_images(dlatents=best_dlatent) generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') images.append( PIL.Image.open(os.path.join(src_dir, "{}.png".format(img_name)))) images.append(img) img.save( os.path.join(generated_images_dir, '{}.png'.format(img_name)), 'PNG') np.save(os.path.join(dlatent_dir, '{}.npy'.format(img_name)), dlatent) generator.reset_dlatents() bw_utils.save_images_to_grid(os.path.join(generated_images_dir, "grid.png"), images, len(images), 2, (1024, 1024), with_numbers=False) # Save video of training if video_frames > 0: os.makedirs(generated_videos_dir, exist_ok=True) video_name = os.path.join(generated_videos_dir, " ".join(names)) # np.save(video_name + ".npy", np.array(dlatent_frames)) # print("Saved dlatent video frames as {}.".format(video_name + ".npy")) image_generator = bw_utils.dlatents_image_generator_fn( dlatent_frames, Gs_network) bw_utils.save_video(image_generator, video_name)
def encode(a, b, c): args1 = { 'src_dir': a, 'generated_images_dir': b, 'dlatent_dir': c, 'batch_size': len(os.listdir(a)), 'average_best_loss': 0.25, 'clipping_threshold': 2.0, 'composite_blur': 8, 'composite_mask': True, 'data_dir': 'data', 'decay_rate': 0.9, 'decay_steps': 24.0, 'dlatent_avg': '', 'early_stopping': True, 'early_stopping_patience': 10, 'early_stopping_threshold': 0.5, 'face_mask': False, 'image_size': 256, 'iterations': 600, 'load_effnet': 'data/finetuned_effnet.h5', 'load_last': '', 'load_mask': False, 'load_resnet': 'data/finetuned_resnet.h5', 'lr': 0.35, 'mask_dir': 'masks', 'model_res': 1024, 'model_url': 'gdrive:networks/stylegan2-ffhq-config-f.pkl', 'optimizer': 'ggt', 'output_video': False, 'randomize_noise': False, 'resnet_image_size': 256, 'scale_mask': 1.4, 'sharpen_input': True, 'tile_dlatents': False, 'use_adaptive_loss': False, 'use_best_loss': True, 'use_discriminator_loss': 0.5, 'use_grabcut': True, 'use_l1_penalty': 0.5, 'use_lpips_loss': 100, 'use_mssim_loss': 200, 'use_pixel_loss': 1.5, 'use_preprocess_input': True, 'use_vgg_layer': 9, 'use_vgg_loss': 0.4, 'vgg_url': 'https://rolux.org/media/stylegan/vgg16_zhang_perceptual.pkl', 'video_codec': 'MJPG', 'video_dir': 'videos', 'video_frame_rate': 24, 'video_size': 512, 'video_skip': 1 } class Struct: def __init__(self, **entries): self.__dict__.update(entries) args = Struct(**args1) args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) if x[0] not in '._' ] ref_images = list(filter(os.path.isfile, ref_images)) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model # tflib.init_tf() generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks( args.model_url) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) if len(ref_images) == 0: return generator if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url(args.vgg_url, cache_dir='.stylegan2-cache') as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator, discriminator_network) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): from keras.applications.resnet50 import preprocess_input print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet from efficientnet import preprocess_input print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) if (ff_model is not None): # predict initial dlatents with ResNet model if (args.use_preprocess_input): dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) else: dlatents = ff_model.predict( load_images(images_batch, image_size=args.resnet_image_size)) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None avg_loss_count = 0 if args.early_stopping: avg_loss = prev_loss = None for loss_dict in pbar: if args.early_stopping: # early stopping feature if prev_loss is not None: if avg_loss is not None: avg_loss = 0.5 * avg_loss + (prev_loss - loss_dict["loss"]) if avg_loss < args.early_stopping_threshold: # count while under threshold; else reset avg_loss_count += 1 else: avg_loss_count = 0 if avg_loss_count > args.early_stopping_patience: # stop once threshold is reached print("") break else: avg_loss = prev_loss - loss_dict["loss"] pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: if best_dlatent is None or args.average_best_loss <= 0.00000001: best_dlatent = generator.get_dlatents() else: best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents( ) if args.use_best_loss: generator.set_dlatents(best_dlatent) best_loss = loss_dict["loss"] if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() prev_loss = loss_dict["loss"] if not args.use_best_loss: best_loss = prev_loss print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them if args.use_best_loss: generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_path, img_name in zip( generated_images, generated_dlatents, images_batch, names): mask_img = None if args.composite_mask and (args.load_mask or args.face_mask): _, im_name = os.path.split(img_path) mask_img = os.path.join(args.mask_dir, f'{im_name}') if args.composite_mask and mask_img is not None and os.path.isfile( mask_img): orig_img = PIL.Image.open(img_path).convert('RGB') width, height = orig_img.size imask = PIL.Image.open(mask_img).convert('L').resize( (width, height)) imask = imask.filter( ImageFilter.GaussianBlur(args.composite_blur)) mask = np.array(imask) / 255 mask = np.expand_dims(mask, axis=-1) img_array = mask * np.array(img_array) + ( 1.0 - mask) * np.array(orig_img) img_array = img_array.astype(np.uint8) #img_array = np.where(mask, np.array(img_array), orig_img) img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() return generator
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Output directories setting parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('guessed_images_dir', help='Directory for storing initially guessed images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') # General params parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--use_resnet', default=True, help='Use pretrained ResNet for approximating dlatents', type=lambda x: (str(x).lower() == 'true')) # Perceptual model params parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) # Loss function options parser.add_argument('--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument('--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=lambda x: (str(x).lower() == 'true')) parser.add_argument('--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=lambda x: (str(x).lower() == 'true')) parser.add_argument('--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--mask_dir', default='masks/latent_interpolation', help='Directory for storing optional masks') parser.add_argument('--face_mask', default=False, help='Generate a mask for predicting only the face area', type=lambda x: (str(x).lower() == 'true')) parser.add_argument('--use_grabcut', default=True, help='Use grabcut algorithm on the face mask to better segment the foreground', type=lambda x: (str(x).lower() == 'true')) parser.add_argument('--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)] ref_images = sorted(list(filter(os.path.isfile, ref_images))) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) # Create output directories os.makedirs('data', exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.guessed_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) if args.face_mask: os.makedirs(args.mask_dir, exist_ok=True) # Initialize generator tflib.init_tf() with open_url(url_styleGAN, cache_dir='cache') as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(model=Gs_network, batch_size=args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) # Initialize perceptual model perc_model = None if args.use_lpips_loss > 1e-7: with open_url(url_VGG_perceptual, cache_dir='cache') as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) # Initialize ResNet model resnet_model = None if args.use_resnet: print("\nLoading ResNet Model:") resnet_model_fn = 'data/finetuned_resnet.h5' gdown.download(url_resnet, resnet_model_fn, quiet=True) resnet_model = load_model(resnet_model_fn) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] perceptual_model.set_reference_images(images_batch) # predict initial dlatents with ResNet model if resnet_model is not None: dlatents = resnet_model.predict(preprocess_input(load_images(images_batch, image_size=args.resnet_image_size))) generator.set_dlatents(dlatents) # Generate and save initially guessed images initial_dlatents = generator.get_dlatents() initial_images = generator.generate_images() for img_array, dlatent, img_name in zip(initial_images, initial_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.guessed_images_dir, f'{img_name}.png'), 'PNG') # Optimization process to find best latent vectors op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) progress_bar = tqdm(op, leave=False, total=args.iterations) best_loss = None best_dlatent = None for loss_dict in progress_bar: progress_bar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) # Save found dlatents generator.set_dlatents(best_dlatent) generated_dlatents = generator.get_dlatents() for dlatent, img_name in zip(generated_dlatents, names): np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() # Concatenate and save dlalents vectors list_dlatents = sorted(os.listdir(args.dlatent_dir)) final_w_vectors = np.array([np.load(args.dlatent_dir + dlatent) for dlatent in list_dlatents]) np.save(os.path.join(args.dlatent_dir, 'output_vectors.npy'), final_w_vectors) # Perform face morphing by interpolating the latent space w1, w2 = create_morphing_lists(final_w_vectors) ref_images_1, ref_images_2 = create_morphing_lists(ref_images) for i in range(len(ref_images_1)): avg_w_vector = (0.5 * (w1[i] + w2[i])).reshape((-1, 18, 512)) generator.set_dlatents(avg_w_vector) img_array = generator.generate_images()[0] img = PIL.Image.fromarray(img_array, 'RGB') img_name = os.path.splitext(os.path.basename(ref_images_1[i]))[0] + '_vs_' + os.path.splitext(os.path.basename(ref_images_2[i]))[0] img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') generator.reset_dlatents()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument( '--model_url', default= 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', help='Fetch a StyleGAN model to train on from this URL' ) # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for Resnet approximation of dlatents') # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=bool) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) resnet_model = None if os.path.exists(args.load_resnet): print("Loading ResNet Model:") resnet_model = load_model(args.load_resnet) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (resnet_model is not None): dlatents = resnet_model.predict( preprocess_resnet_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()