def run(**args): args = EasyDict(args) train = EasyDict(run_func_name = "training.training_loop.training_loop") # training loop options sched = EasyDict() # TrainingSchedule options vis = EasyDict() # visualize.eval() options grid = EasyDict(size = "1080p", layout = "random") # setup_snapshot_img_grid() options sc = dnnlib.SubmitConfig() # dnnlib.submit_run() options # Environment configuration tf_config = { "rnd.np_random_seed": 1000, "allow_soft_placement": True, "gpu_options.per_process_gpu_memory_fraction": 1.0 } if args.gpus != "": num_gpus = len(args.gpus.split(",")) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus assert num_gpus in [1, 2, 4, 8] sc.num_gpus = num_gpus # Networks configuration cG = set_net("G", reg_interval = 4) cD = set_net("D", reg_interval = 16) # Dataset configuration ratios = { "clevr": 0.75, "lsun-bedrooms": 0.72, "cityscapes": 0.5, "ffhq": 1.0 } args.ratio = ratios.get(args.dataset, args.ratio) dataset_args = EasyDict(tfrecord_dir = args.dataset, max_imgs = args.max_images, ratio = args.ratio, num_threads = args.num_threads) for arg in ["data_dir", "mirror_augment", "total_kimg"]: cset(train, arg, args[arg]) # Training and Optimizations configuration for arg in ["eval", "train", "recompile", "last_snapshots"]: cset(train, arg, args[arg]) # Round to the closest multiply of minibatch size for validity args.batch_size -= args.batch_size % args.minibatch_size args.minibatch_std_size -= args.minibatch_std_size % args.minibatch_size args.latent_size -= args.latent_size % args.component_num if args.latent_size == 0: print(bcolored("Error: latent-size is too small. Must best a multiply of component-num.", "red")) exit() sched_args = { "G_lrate": "g_lr", "D_lrate": "d_lr", "minibatch_size": "batch_size", "minibatch_gpu": "minibatch_size" } for arg, cmd_arg in sched_args.items(): cset(sched, arg, args[cmd_arg]) cset(train, "clip", args.clip) # Logging and metrics configuration metrics = [metric_defaults[x] for x in args.metrics] cset(cG.args, "truncation_psi", args.truncation_psi) for arg in ["summarize", "keep_samples"]: cset(train, arg, args[arg]) # Visualization args.imgs = args.images args.ltnts = args.latents vis_types ["imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var", "style_mix"]: # Set of all the set visualization types option vis.vis_types = {arg for arg in vis_types if args[arg]} vis_args = { "grid": "vis_grid" , "num": "vis_num" , "rich_num": "vis_rich_num", "section_size": "vis_section_size", "intrp_density": "intrpolation_density", "intrp_per_component": "intrpolation_per_component", "alpha": "blending_alpha" } for arg, cmd_arg in vis_args.items(): cset(vis, arg, args[cmd_arg]) # Networks architecture cset(cG.args, "architecture", args.g_arch) cset(cD.args, "architecture", args.d_arch) cset([cG.args, cD.args], "resnet_mlp", args.resnet_mlp) cset(cG.args, "tanh", args.tanh) # Latent sizes if args.component_num > 1 if not (args.attention or args.merge): print(bcolored("Error: component-num > 1 but the model is not using components.", "red")) print(bcolored("Either add --attention for GANsformer or --merge for k-GAN).", "red")) exit() args.latent_size = int(args.latent_size / args.component_num) cD.args.latent_size = cG.args.latent_size = cG.args.dlatent_size = args.latent_size cset([cG.args, cD.args, train, vis], "component_num", args.component_num) # Mapping network for arg in ["layersnum", "lrmul", "dim", "shared"]: cset(cG.args, arg, args["mapping_{}".formt(arg)]) # StyleGAN settings for arg in ["style", "latent_stem", "fused_modconv", "local_noise"]: cset(cG.args, arg, args[arg]) cD.args.mbstd_group_size = args.minibatch_std_size # GANsformer cset([cG.args, train], "attention", args.transformer) cset(cD.args, "attention", args.d_transformer) cset([cG.args, cD.args], "num_heads", args.num_heads) args.norm = args.normalize for arg in ["norm", "integration", "ltnt_gate", "img_gate", "kmeans", "kmeans_iters", "asgn_direct", "mapping_ltnt2ltnt"]: cset(cG.args, arg, args[arg]) for arg in ["attention_inputs", "use_pos"]: cset([cG.args, cD.args], arg, args[arg]) # Positional encoding for arg in ["dim", "init", "directions_num"]: field = "pos_{}".format(arg) cset([cG.args, cD.args], field, args[field]) # k-GAN for arg in ["layer", "type", "channelwise"]: field = "merge_{}".format(arg) cset(cG.args, field, args[field]) cset([cG.args, train], "merge", args.merge) # Attention for arg in ["start_res", "end_res", "ltnt2ltnt", "img2img", "local_attention"]: cset(cG.args, arg, args["g_{}".format(arg)]) cset(cD.args, arg, args["d_{}".format(arg)]) cset(cG.args, "img2ltnt", args.g_img2ltnt) cset(cD.args, "ltnt2img", args.d_ltnt2img) # Mixing and dropout for arg in ["style_mixing", "component_mixing", "component_dropout", "attention_dropout"]: cset(cG.args, arg, args[arg]) # Loss and regularization gloss_args = { "loss_type": "g_loss", "reg_weight": "g_reg_weight" "pathreg": "pathreg", } dloss_args = { "loss_type": "d_loss", "reg_type": "d_reg", "gamma": "gamma" } for arg, cmd_arg in gloss_args.items(): cset(cG.loss_args, arg, args[cmd_arg]) for arg, cmd_arg in dloss_args.items(): cset(cD.loss_args, arg, args[cmd_arg]) ##### Experiments management: # Whenever we start a new experiment we store its result in a directory named 'args.expname:000'. # When we rerun a training or evaluation command it restores the model from that directory by default. # If we wish to restart the model training, we can set --restart and then we will store data in a new # directory: 'args.expname:001' after the first restart, then 'args.expname:002' after the second, etc. # Find the latest directory that matches the experiment exp_dir = sorted(glob.glob("{}/{}:*".format(args.result_dir, args.expname)))[-1] run_id = int(exp_dir.split(":")[-1]) # If restart, then work over a new directory if args.restart: run_id += 1 run_name = "{}:{0:03d}".format(args.expname, run_id) train.printname = "{} ".format(misc.bold(args.expname)) snapshot, kimg, resume = None, 0, False pkls = sorted(glob.glob("{}/{}/network*.pkl".format(args.result_dir, run_name))) # Load a particular snapshot is specified if args.pretrained_pkl: # Soft links support snapshot = glob.glob(args.pretrained_pkl)[0] if os.path.islink(snapshot): snapshot = os.readlink(snapshot) # Extract training step from the snapshot if specified try: kimg = int(snapshot.split("-")[-1].split(".")[0]) except: pass # Find latest snapshot in the directory elif len(pkls) > 0: snapshot = pkls[-1] kimg = int(snapshot.split("-")[-1].split(".")[0]) resume = True if snapshot: print(misc.bcolored("Resuming {}, kimg {}".format(snapshot, kimg), "white")) train.resume_pkl = snapshot train.resume_kimg = kimg else: print("Start model training from scratch.", "white") # Run environment configuration sc.run_dir_root = args.result_dir sc.run_desc = args.expname sc.run_id = run_id sc.run_name = run_name sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(cG = cG, cD = cD) kwargs.update(dataset_args = dataset_args, vis_args = vis, sched_args = sched, grid_args = grid, metric_arg_list = metrics, tf_config = tf_config) kwargs.submit_config = copy.deepcopy(sc) kwargs.resume = resume # If reload new options from the command line, no need to load the original configuration file kwargs.load_config = not args.reload dnnlib.submit_run(**kwargs)
def run(**args): args = EasyDict(args) train = EasyDict(run_func_name="training.training_loop.training_loop" ) # training loop options sched = EasyDict() # TrainingSchedule options vis = EasyDict() # visualize.eval() options grid = EasyDict(size="1080p", layout="random") # setup_snapshot_img_grid() options sc = dnnlib.SubmitConfig() # dnnlib.submit_run() options # If the flag is specified without arguments (--arg), set to True for arg in [ "summarize", "keep_samples", "style", "fused_modconv", "local_noise" ]: if args[arg] is None: args[arg] = True if not args.train and not args.eval: misc.log( "Warning: Neither --train nor --eval are provided. Therefore, we only print network shapes", "red") if args.gansformer_default: task = args.dataset pretrained = "gdrive:{}-snapshot.pkl".format(task) if pretrained not in pretrained_networks.gdrive_urls: pretrained = None nset(args, "recompile", pretrained is not None) nset(args, "pretrained_pkl", pretrained) nset(args, "mirror_augment", task in ["cityscapes", "ffhq"]) nset(args, "transformer", True) nset(args, "components_num", {"clevr": 8}.get(task, 16)) nset(args, "latent_size", {"clevr": 128}.get(task, 512)) nset(args, "normalize", "layer") nset(args, "integration", "mul") nset(args, "kmeans", True) nset(args, "use_pos", True) nset(args, "mapping_ltnt2ltnt", task != "clevr") nset(args, "style", task != "clevr") nset(args, "g_arch", "resnet") nset(args, "mapping_resnet", True) gammas = {"ffhq": 10, "cities": 20, "clevr": 40, "bedrooms": 100} nset(args, "gamma", gammas.get(task, 10)) if args.baseline == "GAN": nset(args, "style", False) nset(args, "latent_stem", True) if args.baseline == "SAGAN": nset(args, "style", False) nset(args, "latent_stem", True) nset(args, "g_img2img", 5) if args.baseline == "kGAN": nset(args, "kgan", True) nset(args, "merge_layer", 5) nset(args, "merge_type", "softmax") nset(args, "components_num", 8) # Environment configuration tf_config = { "rnd.np_random_seed": 1000, "allow_soft_placement": True, "gpu_options.per_process_gpu_memory_fraction": 1.0 } if args.gpus != "": num_gpus = len(args.gpus.split(",")) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus assert num_gpus in [1, 2, 4, 8] sc.num_gpus = num_gpus # Networks configuration cG = set_net("G", reg_interval=4) cD = set_net("D", reg_interval=16) # Dataset configuration # For bedrooms, we choose the most common ratio in the # dataset and crop the other images into that ratio. ratios = { "clevr": 0.75, "bedrooms": 188 / 256, "cityscapes": 0.5, "ffhq": 1.0 } args.ratio = ratios.get(args.dataset, args.ratio) dataset_args = EasyDict(tfrecord_dir=args.dataset, max_imgs=args.train_images_num, num_threads=args.num_threads) for arg in ["data_dir", "mirror_augment", "total_kimg", "ratio"]: cset(train, arg, args[arg]) # Training and Optimizations configuration for arg in ["eval", "train", "recompile", "last_snapshots"]: cset(train, arg, args[arg]) # Round to the closest multiply of minibatch size for validity args.batch_size -= args.batch_size % args.minibatch_size args.minibatch_std_size -= args.minibatch_std_size % args.minibatch_size args.latent_size -= args.latent_size % args.components_num if args.latent_size == 0: misc.error( "--latent-size is too small. Must best a multiply of components-num" ) sched_args = { "G_lrate": "g_lr", "D_lrate": "d_lr", "minibatch_size": "batch_size", "minibatch_gpu": "minibatch_size" } for arg, cmd_arg in sched_args.items(): cset(sched, arg, args[cmd_arg]) cset(train, "clip", args.clip) # Logging and metrics configuration metrics = [metric_defaults[x] for x in args.metrics] cset(cG.args, "truncation_psi", args.truncation_psi) for arg in ["keep_samples", "num_heads"]: cset(vis, arg, args[arg]) for arg in ["summarize", "eval_images_num"]: cset(train, arg, args[arg]) # Visualization args.vis_imgs = args.vis_images args.vis_ltnts = args.vis_latents vis_types = [ "imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var", "style_mix" ] # Set of all the set visualization types option vis.vis_types = {arg for arg in vis_types if args["vis_{}".format(arg)]} vis_args = { "attention": "transformer", "grid": "vis_grid", "num": "vis_num", "rich_num": "vis_rich_num", "section_size": "vis_section_size", "intrp_density": "interpolation_density", # "intrp_per_component": "interpolation_per_component", "alpha": "blending_alpha" } for arg, cmd_arg in vis_args.items(): cset(vis, arg, args[cmd_arg]) # Networks architecture cset(cG.args, "architecture", args.g_arch) cset(cD.args, "architecture", args.d_arch) cset(cG.args, "tanh", args.tanh) # Latent sizes if args.components_num > 1: if not (args.transformer or args.kgan): misc.error( "--components-num > 1 but the model is not using components. " + "Either add --transformer for GANsformer or --kgan for k-GAN.") args.latent_size = int(args.latent_size / args.components_num) cD.args.latent_size = cG.args.latent_size = cG.args.dlatent_size = args.latent_size cset([cG.args, cD.args, vis], "components_num", args.components_num) # Mapping network for arg in ["layersnum", "lrmul", "dim", "resnet", "shared_dim"]: field = "mapping_{}".format(arg) cset(cG.args, field, args[field]) # StyleGAN settings for arg in ["style", "latent_stem", "fused_modconv", "local_noise"]: cset(cG.args, arg, args[arg]) cD.args.mbstd_group_size = args.minibatch_std_size # GANsformer cset(cG.args, "transformer", args.transformer) cset(cD.args, "transformer", args.d_transformer) args.norm = args.normalize for arg in [ "norm", "integration", "ltnt_gate", "img_gate", "iterative", "kmeans", "kmeans_iters", "mapping_ltnt2ltnt" ]: cset(cG.args, arg, args[arg]) for arg in ["use_pos", "num_heads"]: cset([cG.args, cD.args], arg, args[arg]) # Positional encoding for arg in ["dim", "init", "directions_num"]: field = "pos_{}".format(arg) cset([cG.args, cD.args], field, args[field]) # k-GAN for arg in ["layer", "type", "same"]: field = "merge_{}".format(arg) cset(cG.args, field, args[field]) cset([cG.args, train], "merge", args.kgan) if args.kgan and args.transformer: misc.error( "Either have --transformer for GANsformer or --kgan for k-GAN, not both" ) # Attention for arg in ["start_res", "end_res", "ltnt2ltnt", "img2img"]: # , "local_attention" cset(cG.args, arg, args["g_{}".format(arg)]) cset(cD.args, arg, args["d_{}".format(arg)]) cset(cG.args, "img2ltnt", args.g_img2ltnt) # cset(cD.args, "ltnt2img", args.d_ltnt2img) # Mixing and dropout for arg in [ "style_mixing", "component_mixing", "component_dropout", "attention_dropout" ]: cset(cG.args, arg, args[arg]) # Loss and regularization gloss_args = { "loss_type": "g_loss", "reg_weight": "g_reg_weight", # "pathreg": "pathreg", } dloss_args = {"loss_type": "d_loss", "reg_type": "d_reg", "gamma": "gamma"} for arg, cmd_arg in gloss_args.items(): cset(cG.loss_args, arg, args[cmd_arg]) for arg, cmd_arg in dloss_args.items(): cset(cD.loss_args, arg, args[cmd_arg]) ##### Experiments management: # Whenever we start a new experiment we store its result in a directory named 'args.expname:000'. # When we rerun a training or evaluation command it restores the model from that directory by default. # If we wish to restart the model training, we can set --restart and then we will store data in a new # directory: 'args.expname:001' after the first restart, then 'args.expname:002' after the second, etc. # Find the latest directory that matches the experiment exp_dir = sorted(glob.glob("{}/{}-*".format(args.result_dir, args.expname))) run_id = 0 if len(exp_dir) > 0: run_id = int(exp_dir[-1].split("-")[-1]) # If restart, then work over a new directory if args.restart: run_id += 1 run_name = "{}-{:03d}".format(args.expname, run_id) train.printname = "{} ".format(misc.bold(args.expname)) snapshot, kimg, resume = None, 0, False pkls = sorted( glob.glob("{}/{}/network*.pkl".format(args.result_dir, run_name))) # Load a particular snapshot is specified if args.pretrained_pkl is not None and args.pretrained_pkl != "None": # Soft links support if args.pretrained_pkl.startswith("gdrive"): if args.pretrained_pkl not in pretrained_networks.gdrive_urls: misc.error( "--pretrained_pkl {} not available in the catalog (see pretrained_networks.py)" ) snapshot = args.pretrained_pkl else: snapshot = glob.glob(args.pretrained_pkl)[0] if os.path.islink(snapshot): snapshot = os.readlink(snapshot) # Extract training step from the snapshot if specified try: kimg = int(snapshot.split("-")[-1].split(".")[0]) except: pass # Find latest snapshot in the directory elif len(pkls) > 0: snapshot = pkls[-1] kimg = int(snapshot.split("-")[-1].split(".")[0]) resume = True if snapshot: misc.log( "Resuming {}, from {}, kimg {}".format(run_name, snapshot, kimg), "white") train.resume_pkl = snapshot train.resume_kimg = kimg else: misc.log("Start model training from scratch", "white") # Run environment configuration sc.run_dir_root = args.result_dir sc.run_desc = args.expname sc.run_id = run_id sc.run_name = run_name sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(cG=cG, cD=cD) kwargs.update(dataset_args=dataset_args, vis_args=vis, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) kwargs.submit_config = copy.deepcopy(sc) kwargs.resume = resume kwargs.load_config = args.reload dnnlib.submit_run(**kwargs)