#desc += '-cond'; dataset.max_label_size = 'full' # conditioned on full label #desc += '-cond1'; dataset.max_label_size = 1 # conditioned on first component of the label #desc += '-g4k'; grid.size = '4k' #desc += '-grpc'; grid.layout = 'row_per_class' # Config presets (choose one). desc += '-preset-v1-1gpu' submit_config.num_gpus = 1 D.mbstd_group_size = 16 sched.minibatch_base = 16 sched.minibatch_dict = { 256: 14, 512: 6, 1024: 3 } sched.lod_training_kimg = 800 sched.lod_transition_kimg = 800 train.total_kimg = 19000 #desc += '-preset-v2-1gpu'; submit_config.num_gpus = 1; sched.minibatch_base = 4; sched.minibatch_dict = {4: 128, 8: 128, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8, 512: 4}; sched.G_lrate_dict = {1024: 0.0015}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 #desc += '-preset-v2-2gpus'; submit_config.num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8}; sched.G_lrate_dict = {512: 0.0015, 1024: 0.002}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 #desc += '-preset-v2-4gpus'; submit_config.num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16}; sched.G_lrate_dict = {256: 0.0015, 512: 0.002, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 #desc += '-preset-v2-8gpus'; submit_config.num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32}; sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 # Numerical precision (choose one). desc += '-fp32' sched.max_minibatch_per_gpu = { 256: 16, 512: 8, 1024: 4 } #desc += '-fp16'; G.dtype = 'float16'; D.dtype = 'float16'; G.pixelnorm_epsilon=1e-4; G_opt.use_loss_scaling = True; D_opt.use_loss_scaling = True; sched.max_minibatch_per_gpu = {512: 16, 1024: 8}
64: 32, 128: 16, 256: 8, 512: 4 } #desc += '-2gpu'; submit_config.num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8} #desc += '-4gpu'; submit_config.num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16} #desc += '-8gpu'; submit_config.num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32} # Default options. train_kimg = 120 # Should be enough in most cases. train.total_kimg = train_kimg train.resume_run_id = "models/2019-02-26-stylegan-faces-network-02048-016041.pkl" train.network_snapshot_ticks = 1 # Save more frequently. sched.lod_initial_resolution = 512 # Train from the highest resolution. sched.lod_training_kimg = train_kimg sched.lod_transition_kimg = train_kimg sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003} sched.D_lrate_dict = EasyDict(sched.G_lrate_dict) sched.tick_kimg_dict = { 4: 160, 8: 140, 16: 120, 32: 100, 64: 80, 128: 60, 256: 40, #512:30, 512: 2, # 2k img / tick 1024: 20 }
def run(dataset, train_dir, config, d_aug, diffaug_policy, cond, ops, jpg_data, mirror, mirror_v, \ lod_step_kimg, batch_size, resume, resume_kimg, finetune, num_gpus, ema_kimg, gamma, freezeD): # training functions if d_aug: # https://github.com/mit-han-lab/data-efficient-gans train = EasyDict( run_func_name='training.training_loop_diffaug.training_loop' ) # Options for training loop (Diff Augment method) loss_args = EasyDict( func_name='training.loss_diffaug.ns_DiffAugment_r1', policy=diffaug_policy) # Options for loss (Diff Augment method) else: # original nvidia train = EasyDict(run_func_name='training.training_loop.training_loop' ) # Options for training loop (original from NVidia) G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg' ) # Options for generator loss. D_loss = EasyDict(func_name='training.loss.D_logistic_r1' ) # Options for discriminator loss. # network functions G = EasyDict(func_name='training.networks_stylegan2.G_main' ) # Options for generator network. D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2' ) # Options for discriminator network. G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. sched = EasyDict() # Options for TrainingSchedule. grid = EasyDict( size='1080p', layout='random') # Options for setup_snapshot_image_grid(). sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). G.impl = D.impl = ops # dataset (tfrecords) - get or create tfr_files = file_list(os.path.dirname(dataset), 'tfr') tfr_files = [ f for f in tfr_files if basename(f).split('-')[0] == basename(dataset) ] if len(tfr_files) == 0 or os.stat(tfr_files[0]).st_size == 0: tfr_file, total_samples = create_from_images(dataset, jpg=jpg_data) else: tfr_file = tfr_files[0] dataset_args = EasyDict(tfrecord=tfr_file, jpg_data=jpg_data) # resolutions with tf.Graph().as_default(), tflib.create_session().as_default(): # pylint: disable=not-context-manager dataset_obj = _dataset.load_dataset( **dataset_args) # loading the data to see what comes out resolution = dataset_obj.resolution init_res = dataset_obj.init_res res_log2 = dataset_obj.res_log2 dataset_obj.close() dataset_obj = None if list(init_res) == [4, 4]: desc = '%s-%d' % (basename(dataset), resolution) else: print(' custom init resolution', init_res) desc = basename(tfr_file) G.init_res = D.init_res = list(init_res) train.savenames = [desc.replace(basename(dataset), 'snapshot'), desc] desc += '-%s' % config # training schedule sched.lod_training_kimg = lod_step_kimg sched.lod_transition_kimg = lod_step_kimg sched.tick_kimg_base = 2 # if finetune else 0.2 train.total_kimg = lod_step_kimg * res_log2 * 3 # 1.5 * ProGAN train.image_snapshot_ticks = 1 train.network_snapshot_ticks = 5 train.mirror_augment = mirror train.mirror_augment_v = mirror_v # learning rate if config == 'e': if finetune: # uptrain 1024 sched.G_lrate_base = 0.001 sched.lrate_step = 150 # period for stepping to next lrate, in kimg else: # train 1024 sched.G_lrate_base = 0.001 sched.G_lrate_dict = {0: 0.001, 1: 0.0007, 2: 0.0005, 3: 0.0003} sched.lrate_step = 1500 # period for stepping to next lrate, in kimg if config == 'f': sched.G_lrate_base = 0.001 # 0.0003 for few-shot datasets sched.D_lrate_base = sched.G_lrate_base # *2 - not used anyway sched.minibatch_gpu_base = batch_size sched.minibatch_size_base = num_gpus * sched.minibatch_gpu_base sc.num_gpus = num_gpus if config == 'e': G.fmap_base = D.fmap_base = 8 << 10 if d_aug: loss_args.gamma = 100 if gamma is None else gamma else: D_loss.gamma = 100 if gamma is None else gamma elif config == 'f': G.fmap_base = D.fmap_base = 16 << 10 else: print(' Only configs E and F are implemented') exit() if cond: desc += '-cond' dataset_args.max_label_size = 'full' # conditioned on full label if freezeD: D.freezeD = True train.resume_with_new_nets = True if d_aug: desc += '-daug' sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt) kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, tf_config=tf_config) kwargs.update(resume_pkl=resume, resume_kimg=resume_kimg, resume_with_new_nets=True) if ema_kimg is not None: kwargs.update(G_ema_kimg=ema_kimg) if d_aug: kwargs.update(loss_args=loss_args) else: kwargs.update(G_loss_args=G_loss, D_loss_args=D_loss) kwargs.submit_config = copy.deepcopy(sc) kwargs.submit_config.run_dir_root = train_dir kwargs.submit_config.run_desc = desc dnnlib.submit_run(**kwargs)
def run(dataset, train_dir, config, d_aug, diffaug_policy, cond, ops, jpg_data, mirror, mirror_v, \ lod_step_kimg, batch_size, resume, resume_kimg, finetune, num_gpus, ema_kimg, gamma, freezeD): # dataset (tfrecords) - preprocess or get tfr_files = file_list(os.path.dirname(dataset), 'tfr') tfr_files = [f for f in tfr_files if basename(dataset) in f] if len(tfr_files) == 0: tfr_file, total_samples = create_from_images(dataset, jpg=jpg_data) else: tfr_file = tfr_files[0] dataset_args = EasyDict(tfrecord=tfr_file, jpg_data=jpg_data) desc = basename(tfr_file).split('-')[0] # training functions if d_aug: # https://github.com/mit-han-lab/data-efficient-gans train = EasyDict( run_func_name='training.training_loop_diffaug.training_loop' ) # Options for training loop (Diff Augment method) loss_args = EasyDict( func_name='training.loss_diffaug.ns_DiffAugment_r1', policy=diffaug_policy) # Options for loss (Diff Augment method) else: # original nvidia train = EasyDict(run_func_name='training.training_loop.training_loop' ) # Options for training loop (original from NVidia) G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg' ) # Options for generator loss. D_loss = EasyDict(func_name='training.loss.D_logistic_r1' ) # Options for discriminator loss. # network functions G = EasyDict(func_name='training.networks_stylegan2.G_main' ) # Options for generator network. D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2' ) # Options for discriminator network. G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. sched = EasyDict() # Options for TrainingSchedule. grid = EasyDict( size='1080p', layout='random') # Options for setup_snapshot_image_grid(). sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). G.impl = D.impl = ops # resolutions data_res = basename(tfr_file).split('-')[-1].split( 'x') # get resolution from dataset filename data_res = list(reversed([int(x) for x in data_res])) # convert to int list init_res, resolution, res_log2 = calc_init_res(data_res) if init_res != [4, 4]: print(' custom init resolution', init_res) G.init_res = D.init_res = list(init_res) train.setname = desc + config desc = '%s-%d-%s' % (desc, resolution, config) # training schedule sched.lod_training_kimg = lod_step_kimg sched.lod_transition_kimg = lod_step_kimg train.total_kimg = lod_step_kimg * res_log2 * 2 # a la ProGAN if finetune: train.total_kimg = 15000 # should start from ~10k kimg train.image_snapshot_ticks = 1 train.network_snapshot_ticks = 5 train.mirror_augment = mirror train.mirror_augment_v = mirror_v # learning rate if config == 'e': if finetune: # uptrain 1024 sched.G_lrate_base = 0.001 else: # train 1024 sched.G_lrate_base = 0.001 sched.G_lrate_dict = {0: 0.001, 1: 0.0007, 2: 0.0005, 3: 0.0003} sched.lrate_step = 1500 # period for stepping to next lrate, in kimg if config == 'f': # sched.G_lrate_base = 0.0003 sched.G_lrate_base = 0.001 sched.D_lrate_base = sched.G_lrate_base # *2 - not used anyway sched.minibatch_gpu_base = batch_size sched.minibatch_size_base = num_gpus * sched.minibatch_gpu_base sc.num_gpus = num_gpus if config == 'e': G.fmap_base = D.fmap_base = 8 << 10 if d_aug: loss_args.gamma = 100 if gamma is None else gamma else: D_loss.gamma = 100 if gamma is None else gamma elif config == 'f': G.fmap_base = D.fmap_base = 16 << 10 else: print(' Only configs E and F are implemented') exit() if cond: desc += '-cond' dataset_args.max_label_size = 'full' # conditioned on full label if freezeD: D.freezeD = True train.resume_with_new_nets = True if d_aug: desc += '-daug' sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt) kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, tf_config=tf_config) kwargs.update(resume_pkl=resume, resume_kimg=resume_kimg, resume_with_new_nets=True) if ema_kimg is not None: kwargs.update(G_ema_kimg=ema_kimg) if d_aug: kwargs.update(loss_args=loss_args) else: kwargs.update(G_loss_args=G_loss, D_loss_args=D_loss) kwargs.submit_config = copy.deepcopy(sc) kwargs.submit_config.run_dir_root = train_dir kwargs.submit_config.run_desc = desc dnnlib.submit_run(**kwargs)
#desc += '-lsun-restaurant'; dataset = EasyDict(tfrecord_dir='lsun-restaurant-100k'); train.mirror_augment = True #desc += '-lsun-sheep'; dataset = EasyDict(tfrecord_dir='lsun-sheep-100k'); train.mirror_augment = True #desc += '-lsun-sofa'; dataset = EasyDict(tfrecord_dir='lsun-sofa-100k'); train.mirror_augment = True #desc += '-lsun-tower'; dataset = EasyDict(tfrecord_dir='lsun-tower-100k'); train.mirror_augment = True #desc += '-lsun-train'; dataset = EasyDict(tfrecord_dir='lsun-train-100k'); train.mirror_augment = True #desc += '-lsun-tvmonitor'; dataset = EasyDict(tfrecord_dir='lsun-tvmonitor-100k'); train.mirror_augment = True desc += '-test'; dataset = EasyDict(tfrecord_dir='test-2k', resolution=256); train.mirror_augment = True # Conditioning & snapshot options. #desc += '-cond'; dataset.max_label_size = 'full' # conditioned on full label #desc += '-cond1'; dataset.max_label_size = 1 # conditioned on first component of the label #desc += '-g4k'; grid.size = '4k' #desc += '-grpc'; grid.layout = 'row_per_class' # Config presets (choose one). desc += '-preset-v1-1gpu'; submit_config.num_gpus = 1; D.mbstd_group_size = 16; sched.minibatch_base = 16; sched.minibatch_dict = {256: 14, 512: 6, 1024: 3}; sched.lod_training_kimg = 800; sched.lod_transition_kimg = 800; train.total_kimg = 19000 #desc += '-preset-v2-1gpu'; submit_config.num_gpus = 1; sched.minibatch_base = 4; sched.minibatch_dict = {4: 128, 8: 128, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8, 512: 4}; sched.G_lrate_dict = {1024: 0.0015}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 #desc += '-preset-v2-2gpus'; submit_config.num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8}; sched.G_lrate_dict = {512: 0.0015, 1024: 0.002}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 #desc += '-preset-v2-4gpus'; submit_config.num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16}; sched.G_lrate_dict = {256: 0.0015, 512: 0.002, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 #desc += '-preset-v2-8gpus'; submit_config.num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32}; sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 # Numerical precision (choose one). desc += '-fp32'; sched.max_minibatch_per_gpu = {256: 16, 512: 8, 1024: 4} #desc += '-fp16'; G.dtype = 'float16'; D.dtype = 'float16'; G.pixelnorm_epsilon=1e-4; G_opt.use_loss_scaling = True; D_opt.use_loss_scaling = True; sched.max_minibatch_per_gpu = {512: 16, 1024: 8} # Disable individual features. #desc += '-nogrowing'; sched.lod_initial_resolution = 1024; sched.lod_training_kimg = 0; sched.lod_transition_kimg = 0; train.total_kimg = 10000 #desc += '-nopixelnorm'; G.use_pixelnorm = False #desc += '-nowscale'; G.use_wscale = False; D.use_wscale = False #desc += '-noleakyrelu'; G.use_leakyrelu = False #desc += '-nosmoothing'; train.G_smoothing_kimg = 0.0