Beispiel #1
0
def run(**args): 
    args      = EasyDict(args)
    train     = EasyDict(run_func_name = "training.training_loop.training_loop") # training loop options
    sched     = EasyDict()                                                       # TrainingSchedule options
    vis       = EasyDict()                                                       # visualize.eval() options
    grid      = EasyDict(size = "1080p", layout = "random")                      # setup_snapshot_img_grid() options
    sc        = dnnlib.SubmitConfig()                                            # dnnlib.submit_run() options

    # Environment configuration
    tf_config = {
        "rnd.np_random_seed": 1000, 
        "allow_soft_placement": True, 
        "gpu_options.per_process_gpu_memory_fraction": 1.0
    } 
    if args.gpus != "":
        num_gpus = len(args.gpus.split(","))
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    assert num_gpus in [1, 2, 4, 8]
    sc.num_gpus = num_gpus

    # Networks configuration
    cG = set_net("G", reg_interval = 4)
    cD = set_net("D", reg_interval = 16)

    # Dataset configuration
    ratios = {
        "clevr": 0.75, 
        "lsun-bedrooms": 0.72, 
        "cityscapes": 0.5, 
        "ffhq": 1.0
    }
    args.ratio = ratios.get(args.dataset, args.ratio)
    dataset_args = EasyDict(tfrecord_dir = args.dataset, max_imgs = args.max_images, ratio = args.ratio,
        num_threads = args.num_threads)
    for arg in ["data_dir", "mirror_augment", "total_kimg"]:
        cset(train, arg, args[arg])

    # Training and Optimizations configuration
    for arg in ["eval", "train", "recompile", "last_snapshots"]:
        cset(train, arg, args[arg])

    # Round to the closest multiply of minibatch size for validity
    args.batch_size -= args.batch_size % args.minibatch_size
    args.minibatch_std_size -= args.minibatch_std_size % args.minibatch_size
    args.latent_size -= args.latent_size % args.component_num
    if args.latent_size == 0:
        print(bcolored("Error: latent-size is too small. Must best a multiply of component-num.", "red")) 
        exit()

    sched_args = {
        "G_lrate": "g_lr",
        "D_lrate": "d_lr",
        "minibatch_size": "batch_size",
        "minibatch_gpu": "minibatch_size"
    }
    for arg, cmd_arg in sched_args.items():
        cset(sched, arg, args[cmd_arg])
    cset(train, "clip", args.clip)

    # Logging and metrics configuration
    metrics = [metric_defaults[x] for x in args.metrics]
    cset(cG.args, "truncation_psi", args.truncation_psi)
    for arg in ["summarize", "keep_samples"]:
        cset(train, arg, args[arg])

    # Visualization
    args.imgs = args.images
    args.ltnts = args.latents
    vis_types ["imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var", "style_mix"]:
    # Set of all the set visualization types option
    vis.vis_types = {arg for arg in vis_types if args[arg]}

    vis_args = {
        "grid": "vis_grid"    ,
        "num": "vis_num"   ,
        "rich_num": "vis_rich_num",
        "section_size": "vis_section_size",
        "intrp_density": "intrpolation_density",
        "intrp_per_component": "intrpolation_per_component",
        "alpha": "blending_alpha"
    }
    for arg, cmd_arg in vis_args.items():
        cset(vis, arg, args[cmd_arg])

    # Networks architecture
    cset(cG.args, "architecture", args.g_arch)
    cset(cD.args, "architecture", args.d_arch)
    cset([cG.args, cD.args], "resnet_mlp", args.resnet_mlp)
    cset(cG.args, "tanh", args.tanh)

    # Latent sizes
    if args.component_num > 1 
        if not (args.attention or args.merge):
            print(bcolored("Error: component-num > 1 but the model is not using components.", "red")) 
            print(bcolored("Either add --attention for GANsformer or --merge for k-GAN).", "red"))
            exit()    
        args.latent_size = int(args.latent_size / args.component_num)
    cD.args.latent_size = cG.args.latent_size = cG.args.dlatent_size = args.latent_size 
    cset([cG.args, cD.args, train, vis], "component_num", args.component_num)

    # Mapping network
    for arg in ["layersnum", "lrmul", "dim", "shared"]:
        cset(cG.args, arg, args["mapping_{}".formt(arg)])    

    # StyleGAN settings
    for arg in ["style", "latent_stem", "fused_modconv", "local_noise"]:
        cset(cG.args, arg, args[arg])  
    cD.args.mbstd_group_size = args.minibatch_std_size

    # GANsformer
    cset([cG.args, train], "attention", args.transformer)
    cset(cD.args, "attention", args.d_transformer)
    cset([cG.args, cD.args], "num_heads", args.num_heads)

    args.norm = args.normalize
    for arg in ["norm", "integration", "ltnt_gate", "img_gate", "kmeans", 
                "kmeans_iters", "asgn_direct", "mapping_ltnt2ltnt"]:
        cset(cG.args, arg, args[arg])  

    for arg in ["attention_inputs", "use_pos"]:
        cset([cG.args, cD.args], arg, args[arg])  

    # Positional encoding
    for arg in ["dim", "init", "directions_num"]:
        field = "pos_{}".format(arg)
        cset([cG.args, cD.args], field, args[field])  

    # k-GAN
    for arg in ["layer", "type", "channelwise"]:
        field = "merge_{}".format(arg)
        cset(cG.args, field, args[field])  
    cset([cG.args, train], "merge", args.merge)

    # Attention
    for arg in ["start_res", "end_res", "ltnt2ltnt", "img2img", "local_attention"]:
        cset(cG.args, arg, args["g_{}".format(arg)]) 
        cset(cD.args, arg, args["d_{}".format(arg)])         
    cset(cG.args, "img2ltnt", args.g_img2ltnt)
    cset(cD.args, "ltnt2img", args.d_ltnt2img)

    # Mixing and dropout
    for arg in ["style_mixing", "component_mixing", "component_dropout", "attention_dropout"]:
        cset(cG.args, arg, args[arg])  

    # Loss and regularization
    gloss_args = {
        "loss_type": "g_loss",
        "reg_weight": "g_reg_weight"
        "pathreg": "pathreg",
    }
    dloss_args = {
        "loss_type": "d_loss",
        "reg_type": "d_reg",
        "gamma": "gamma"
    }    
    for arg, cmd_arg in gloss_args.items():
        cset(cG.loss_args, arg, args[cmd_arg])
    for arg, cmd_arg in dloss_args.items():
        cset(cD.loss_args, arg, args[cmd_arg])

    ##### Experiments management:
    # Whenever we start a new experiment we store its result in a directory named 'args.expname:000'.
    # When we rerun a training or evaluation command it restores the model from that directory by default.
    # If we wish to restart the model training, we can set --restart and then we will store data in a new
    # directory: 'args.expname:001' after the first restart, then 'args.expname:002' after the second, etc.

    # Find the latest directory that matches the experiment
    exp_dir = sorted(glob.glob("{}/{}:*".format(args.result_dir, args.expname)))[-1]
    run_id = int(exp_dir.split(":")[-1])
    # If restart, then work over a new directory
    if args.restart:
        run_id += 1

    run_name = "{}:{0:03d}".format(args.expname, run_id)
    train.printname = "{} ".format(misc.bold(args.expname))

    snapshot, kimg, resume = None, 0, False
    pkls = sorted(glob.glob("{}/{}/network*.pkl".format(args.result_dir, run_name)))
    # Load a particular snapshot is specified 
    if args.pretrained_pkl:
        # Soft links support
        snapshot = glob.glob(args.pretrained_pkl)[0]
        if os.path.islink(snapshot):
            snapshot = os.readlink(snapshot)

        # Extract training step from the snapshot if specified
        try:
            kimg = int(snapshot.split("-")[-1].split(".")[0])
        except:
            pass

    # Find latest snapshot in the directory
    elif len(pkls) > 0:
        snapshot = pkls[-1]
        kimg = int(snapshot.split("-")[-1].split(".")[0])
        resume = True

    if snapshot:
        print(misc.bcolored("Resuming {}, kimg {}".format(snapshot, kimg), "white"))
        train.resume_pkl = snapshot
        train.resume_kimg = kimg
    else:
        print("Start model training from scratch.", "white")

    # Run environment configuration
    sc.run_dir_root = args.result_dir
    sc.run_desc = args.expname
    sc.run_id = run_id
    sc.run_name = run_name
    sc.submit_target = dnnlib.SubmitTarget.LOCAL
    sc.local.do_not_copy_source_files = True

    kwargs = EasyDict(train)
    kwargs.update(cG = cG, cD = cD)
    kwargs.update(dataset_args = dataset_args, vis_args = vis, sched_args = sched, grid_args = grid, metric_arg_list = metrics, tf_config = tf_config)
    kwargs.submit_config = copy.deepcopy(sc)
    kwargs.resume = resume
    # If reload new options from the command line, no need to load the original configuration file
    kwargs.load_config = not args.reload

    dnnlib.submit_run(**kwargs)
Beispiel #2
0
def run(**args):
    args = EasyDict(args)
    train = EasyDict(run_func_name="training.training_loop.training_loop"
                     )  # training loop options
    sched = EasyDict()  # TrainingSchedule options
    vis = EasyDict()  # visualize.eval() options
    grid = EasyDict(size="1080p",
                    layout="random")  # setup_snapshot_img_grid() options
    sc = dnnlib.SubmitConfig()  # dnnlib.submit_run() options

    # If the flag is specified without arguments (--arg), set to True
    for arg in [
            "summarize", "keep_samples", "style", "fused_modconv",
            "local_noise"
    ]:
        if args[arg] is None:
            args[arg] = True

    if not args.train and not args.eval:
        misc.log(
            "Warning: Neither --train nor --eval are provided. Therefore, we only print network shapes",
            "red")

    if args.gansformer_default:
        task = args.dataset
        pretrained = "gdrive:{}-snapshot.pkl".format(task)
        if pretrained not in pretrained_networks.gdrive_urls:
            pretrained = None

        nset(args, "recompile", pretrained is not None)
        nset(args, "pretrained_pkl", pretrained)
        nset(args, "mirror_augment", task in ["cityscapes", "ffhq"])

        nset(args, "transformer", True)
        nset(args, "components_num", {"clevr": 8}.get(task, 16))
        nset(args, "latent_size", {"clevr": 128}.get(task, 512))

        nset(args, "normalize", "layer")
        nset(args, "integration", "mul")
        nset(args, "kmeans", True)
        nset(args, "use_pos", True)
        nset(args, "mapping_ltnt2ltnt", task != "clevr")
        nset(args, "style", task != "clevr")

        nset(args, "g_arch", "resnet")
        nset(args, "mapping_resnet", True)

        gammas = {"ffhq": 10, "cities": 20, "clevr": 40, "bedrooms": 100}
        nset(args, "gamma", gammas.get(task, 10))

    if args.baseline == "GAN":
        nset(args, "style", False)
        nset(args, "latent_stem", True)

    if args.baseline == "SAGAN":
        nset(args, "style", False)
        nset(args, "latent_stem", True)
        nset(args, "g_img2img", 5)

    if args.baseline == "kGAN":
        nset(args, "kgan", True)
        nset(args, "merge_layer", 5)
        nset(args, "merge_type", "softmax")
        nset(args, "components_num", 8)

    # Environment configuration
    tf_config = {
        "rnd.np_random_seed": 1000,
        "allow_soft_placement": True,
        "gpu_options.per_process_gpu_memory_fraction": 1.0
    }
    if args.gpus != "":
        num_gpus = len(args.gpus.split(","))
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    assert num_gpus in [1, 2, 4, 8]
    sc.num_gpus = num_gpus

    # Networks configuration
    cG = set_net("G", reg_interval=4)
    cD = set_net("D", reg_interval=16)

    # Dataset configuration
    # For bedrooms, we choose the most common ratio in the
    # dataset and crop the other images into that ratio.
    ratios = {
        "clevr": 0.75,
        "bedrooms": 188 / 256,
        "cityscapes": 0.5,
        "ffhq": 1.0
    }
    args.ratio = ratios.get(args.dataset, args.ratio)
    dataset_args = EasyDict(tfrecord_dir=args.dataset,
                            max_imgs=args.train_images_num,
                            num_threads=args.num_threads)
    for arg in ["data_dir", "mirror_augment", "total_kimg", "ratio"]:
        cset(train, arg, args[arg])

    # Training and Optimizations configuration
    for arg in ["eval", "train", "recompile", "last_snapshots"]:
        cset(train, arg, args[arg])

    # Round to the closest multiply of minibatch size for validity
    args.batch_size -= args.batch_size % args.minibatch_size
    args.minibatch_std_size -= args.minibatch_std_size % args.minibatch_size
    args.latent_size -= args.latent_size % args.components_num
    if args.latent_size == 0:
        misc.error(
            "--latent-size is too small. Must best a multiply of components-num"
        )

    sched_args = {
        "G_lrate": "g_lr",
        "D_lrate": "d_lr",
        "minibatch_size": "batch_size",
        "minibatch_gpu": "minibatch_size"
    }
    for arg, cmd_arg in sched_args.items():
        cset(sched, arg, args[cmd_arg])
    cset(train, "clip", args.clip)

    # Logging and metrics configuration
    metrics = [metric_defaults[x] for x in args.metrics]

    cset(cG.args, "truncation_psi", args.truncation_psi)
    for arg in ["keep_samples", "num_heads"]:
        cset(vis, arg, args[arg])
    for arg in ["summarize", "eval_images_num"]:
        cset(train, arg, args[arg])

    # Visualization
    args.vis_imgs = args.vis_images
    args.vis_ltnts = args.vis_latents
    vis_types = [
        "imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var",
        "style_mix"
    ]
    # Set of all the set visualization types option
    vis.vis_types = {arg for arg in vis_types if args["vis_{}".format(arg)]}

    vis_args = {
        "attention": "transformer",
        "grid": "vis_grid",
        "num": "vis_num",
        "rich_num": "vis_rich_num",
        "section_size": "vis_section_size",
        "intrp_density": "interpolation_density",
        # "intrp_per_component": "interpolation_per_component",
        "alpha": "blending_alpha"
    }
    for arg, cmd_arg in vis_args.items():
        cset(vis, arg, args[cmd_arg])

    # Networks architecture
    cset(cG.args, "architecture", args.g_arch)
    cset(cD.args, "architecture", args.d_arch)
    cset(cG.args, "tanh", args.tanh)

    # Latent sizes
    if args.components_num > 1:
        if not (args.transformer or args.kgan):
            misc.error(
                "--components-num > 1 but the model is not using components. "
                +
                "Either add --transformer for GANsformer or --kgan for k-GAN.")

        args.latent_size = int(args.latent_size / args.components_num)
    cD.args.latent_size = cG.args.latent_size = cG.args.dlatent_size = args.latent_size
    cset([cG.args, cD.args, vis], "components_num", args.components_num)

    # Mapping network
    for arg in ["layersnum", "lrmul", "dim", "resnet", "shared_dim"]:
        field = "mapping_{}".format(arg)
        cset(cG.args, field, args[field])

    # StyleGAN settings
    for arg in ["style", "latent_stem", "fused_modconv", "local_noise"]:
        cset(cG.args, arg, args[arg])
    cD.args.mbstd_group_size = args.minibatch_std_size

    # GANsformer
    cset(cG.args, "transformer", args.transformer)
    cset(cD.args, "transformer", args.d_transformer)

    args.norm = args.normalize
    for arg in [
            "norm", "integration", "ltnt_gate", "img_gate", "iterative",
            "kmeans", "kmeans_iters", "mapping_ltnt2ltnt"
    ]:
        cset(cG.args, arg, args[arg])

    for arg in ["use_pos", "num_heads"]:
        cset([cG.args, cD.args], arg, args[arg])

    # Positional encoding
    for arg in ["dim", "init", "directions_num"]:
        field = "pos_{}".format(arg)
        cset([cG.args, cD.args], field, args[field])

    # k-GAN
    for arg in ["layer", "type", "same"]:
        field = "merge_{}".format(arg)
        cset(cG.args, field, args[field])
    cset([cG.args, train], "merge", args.kgan)

    if args.kgan and args.transformer:
        misc.error(
            "Either have --transformer for GANsformer or --kgan for k-GAN, not both"
        )

    # Attention
    for arg in ["start_res", "end_res", "ltnt2ltnt",
                "img2img"]:  # , "local_attention"
        cset(cG.args, arg, args["g_{}".format(arg)])
        cset(cD.args, arg, args["d_{}".format(arg)])
    cset(cG.args, "img2ltnt", args.g_img2ltnt)
    # cset(cD.args, "ltnt2img", args.d_ltnt2img)

    # Mixing and dropout
    for arg in [
            "style_mixing", "component_mixing", "component_dropout",
            "attention_dropout"
    ]:
        cset(cG.args, arg, args[arg])

    # Loss and regularization
    gloss_args = {
        "loss_type": "g_loss",
        "reg_weight": "g_reg_weight",
        # "pathreg": "pathreg",
    }
    dloss_args = {"loss_type": "d_loss", "reg_type": "d_reg", "gamma": "gamma"}
    for arg, cmd_arg in gloss_args.items():
        cset(cG.loss_args, arg, args[cmd_arg])
    for arg, cmd_arg in dloss_args.items():
        cset(cD.loss_args, arg, args[cmd_arg])

    ##### Experiments management:
    # Whenever we start a new experiment we store its result in a directory named 'args.expname:000'.
    # When we rerun a training or evaluation command it restores the model from that directory by default.
    # If we wish to restart the model training, we can set --restart and then we will store data in a new
    # directory: 'args.expname:001' after the first restart, then 'args.expname:002' after the second, etc.

    # Find the latest directory that matches the experiment
    exp_dir = sorted(glob.glob("{}/{}-*".format(args.result_dir,
                                                args.expname)))
    run_id = 0
    if len(exp_dir) > 0:
        run_id = int(exp_dir[-1].split("-")[-1])
    # If restart, then work over a new directory
    if args.restart:
        run_id += 1

    run_name = "{}-{:03d}".format(args.expname, run_id)
    train.printname = "{} ".format(misc.bold(args.expname))

    snapshot, kimg, resume = None, 0, False
    pkls = sorted(
        glob.glob("{}/{}/network*.pkl".format(args.result_dir, run_name)))
    # Load a particular snapshot is specified
    if args.pretrained_pkl is not None and args.pretrained_pkl != "None":
        # Soft links support
        if args.pretrained_pkl.startswith("gdrive"):
            if args.pretrained_pkl not in pretrained_networks.gdrive_urls:
                misc.error(
                    "--pretrained_pkl {} not available in the catalog (see pretrained_networks.py)"
                )

            snapshot = args.pretrained_pkl
        else:
            snapshot = glob.glob(args.pretrained_pkl)[0]
            if os.path.islink(snapshot):
                snapshot = os.readlink(snapshot)

        # Extract training step from the snapshot if specified
        try:
            kimg = int(snapshot.split("-")[-1].split(".")[0])
        except:
            pass

    # Find latest snapshot in the directory
    elif len(pkls) > 0:
        snapshot = pkls[-1]
        kimg = int(snapshot.split("-")[-1].split(".")[0])
        resume = True

    if snapshot:
        misc.log(
            "Resuming {}, from {}, kimg {}".format(run_name, snapshot, kimg),
            "white")
        train.resume_pkl = snapshot
        train.resume_kimg = kimg
    else:
        misc.log("Start model training from scratch", "white")

    # Run environment configuration
    sc.run_dir_root = args.result_dir
    sc.run_desc = args.expname
    sc.run_id = run_id
    sc.run_name = run_name
    sc.submit_target = dnnlib.SubmitTarget.LOCAL
    sc.local.do_not_copy_source_files = True

    kwargs = EasyDict(train)
    kwargs.update(cG=cG, cD=cD)
    kwargs.update(dataset_args=dataset_args,
                  vis_args=vis,
                  sched_args=sched,
                  grid_args=grid,
                  metric_arg_list=metrics,
                  tf_config=tf_config)
    kwargs.submit_config = copy.deepcopy(sc)
    kwargs.resume = resume
    kwargs.load_config = args.reload

    dnnlib.submit_run(**kwargs)