Example #1
0
def create_from_image_folders(datadir, shuffle=True, size=None):
    assert os.path.isdir(datadir)
    imgs = []
    labels = []
    for root, subdirs, files in os.walk(datadir):
        for i, subdir in enumerate(subdirs):
            tmp_list = img_list(os.path.join(root, subdir))
            imgs = imgs + tmp_list
            labels = labels + [i] * len(tmp_list)
    labels = np.array(labels)
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.

    assert len(imgs) > 0, ' No input images found!'
    sample_img = np.asarray(PIL.Image.open(imgs[0]))
    sample_shape = sample_img.shape
    channels = sample_shape[2] if sample_img.ndim == 3 else 1
    assert channels in [1, 3, 4], ' Weird color dim: %d' % channels
    print(' Making dataset ..', datadir, sample_shape,
          '%d labels' % (np.max(labels) + 1))
    jpg = channels < 4
    if jpg is True: print(' Loading JPG as is!')

    with TFRecordExporter(datadir, len(imgs)) as tfr:
        order = tfr.choose_shuffled_order() if shuffle else np.arange(
            len(imgs))
        pbar = ProgressBar(order.size)
        for idx in range(order.size):
            img_path = imgs[order[idx]]
            tfr.add_image(img_path, jpg=jpg, size=size)
            pbar.upd()
        tfr.add_labels(onehot[order])
    return tfr.tfr_file, len(imgs)
Example #2
0
def copy_and_crop_or_pad_trainables(src_net, tgt_net) -> None:
    source_trainables = src_net.trainables.keys()
    target_trainables = tgt_net.trainables.keys()
    names = [pair for pair in zip(source_trainables, target_trainables)]

    skip = []
    pbar = ProgressBar(len(names))
    for pair in names:
        source_name, target_name = pair
        log = source_name
        x = src_net.get_var(source_name)
        y = tgt_net.get_var(target_name)
        source_shape = x.shape
        target_shape = y.shape
        if source_shape != target_shape:
            update = x
            index = None
            if 'Dense' in source_name:
                if source_shape[0] > target_shape[0]:
                    gap = source_shape[0] - target_shape[0]
                    start = abs(gap) // 2
                    end = start + target_shape[0]
                    update = update[start:end, :]
                else:
                    update = pad_symm_np(update, target_shape)
                    log = (log, source_shape, '=>', target_shape)
            else:
                try:
                    if source_shape[2] > target_shape[2]:
                        index = 2
                        gap = source_shape[index] - target_shape[index]
                        start = abs(gap) // 2
                        end = start + target_shape[index]
                        update = update[:, :, start:end, :]
                    if source_shape[3] > target_shape[3]:
                        index = 3
                        gap = source_shape[index] - target_shape[index]
                        start = abs(gap) // 2
                        end = start + target_shape[index]
                        update = update[:, :, :, start:end]
                except:
                    print(' Wrong var pair?', source_name, source_shape,
                          target_name, target_shape)
                    exit(1)

                if source_shape[2] < target_shape[2] or source_shape[
                        3] < target_shape[3]:
                    update = pad_symm_np(update, target_shape[2:])
                    log = (log, source_shape, '=>', target_shape)
                    # print(pair, source_shape, target_shape)

            tgt_net.set_var(target_name, update)
            skip.append(source_name)
        pbar.upd(pair)

    weights_to_copy = {
        tgt_net.vars[pair[1]]: src_net.vars[pair[0]]
        for pair in names if pair[0] not in skip
    }
    tfutil.set_vars(tfutil.run(weights_to_copy))
def project_image(proj, targets, work_dir, resolution, num_snapshots):
    filename = osp.join(work_dir, basename(work_dir))
    video_out = cv2.VideoWriter(filename + '.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 25, resolution)
    
    snapshot_steps = set(proj.num_steps - np.linspace(0, proj.num_steps, num_snapshots, endpoint=False, dtype=int))
    misc.save_image_grid(targets, filename + '.jpg', drange=[-1,1])
    proj.start(targets)
    pbar = ProgressBar(proj.num_steps)
    while proj.get_cur_step() < proj.num_steps:
        proj.step()
        write_video_frame(proj, video_out)
        if proj.get_cur_step() in snapshot_steps:
            misc.save_image_grid(proj.get_images(), filename + '-%04d.jpg' % proj.get_cur_step(), drange=[-1,1])
        pbar.upd()

    dlats = proj.get_dlatents()
    np.save(filename + '-%04d.npy' % proj.get_cur_step(), dlats)
    video_out.release()
Example #4
0
def copy_and_fill_trainables(src_net, tgt_net) -> None:  # model => conditional
    train_vars = [
        name for name in src_net.trainables.keys()
        if name in tgt_net.trainables.keys()
    ]
    skip = []
    pbar = ProgressBar(len(train_vars))
    for name in train_vars:
        x = src_net.get_var(name)
        y = tgt_net.get_var(name)
        src_shape = x.shape
        tgt_shape = y.shape
        if src_shape != tgt_shape:
            assert len(src_shape) == len(
                tgt_shape), "Different shapes: %s %s" % (str(src_shape),
                                                         str(tgt_shape))
            if np.less(
                    tgt_shape,
                    src_shape).any():  # kill labels: [1024,512] => [512,512]
                try:
                    update = x[:tgt_shape[0], :tgt_shape[1],
                               ...]  # !!! corrects only first two dims
                except:
                    update = x[:tgt_shape[0]]
            elif np.greater(
                    tgt_shape,
                    src_shape).any():  # add labels: [512,512] => [1024,512]
                tile_count = [
                    tgt_shape[i] // src_shape[i] for i in range(len(src_shape))
                ]
                if a.verbose is True:
                    print(name, tile_count, src_shape, '=>', tgt_shape,
                          '\n\n')  # G_mapping/Dense0, D/Output
                update = np.tile(x, tile_count)
            tgt_net.set_var(name, update)
            skip.append(name)
        pbar.upd(name)
    weights_to_copy = {
        tgt_net.vars[name]: src_net.vars[name]
        for name in train_vars if name not in skip
    }
    tfutil.set_vars(tfutil.run(weights_to_copy))
Example #5
0
def create_from_images(datadir, shuffle=True, size=None):
    assert os.path.isdir(datadir)
    imgs = sorted(img_list(datadir, subdir=True))
    assert len(imgs) > 0, ' No input images found!'

    sample_img = np.asarray(PIL.Image.open(imgs[0]))
    sample_shape = sample_img.shape
    channels = sample_shape[2] if sample_img.ndim == 3 else 1
    assert channels in [1, 3, 4], ' Weird color dim: %d' % channels
    print(' Making dataset ..', datadir, sample_shape)
    jpg = channels < 4
    if jpg is True: print(' Loading JPG as is!')

    with TFRecordExporter(datadir, len(imgs)) as tfr:
        order = tfr.choose_shuffled_order() if shuffle else np.arange(
            len(imgs))
        pbar = ProgressBar(order.size)
        for idx in range(order.size):
            img_path = imgs[order[idx]]
            tfr.add_image(img_path, jpg=jpg, size=size)
            pbar.upd()
    return tfr.tfr_file, len(imgs)
Example #6
0
def generate():
    os.makedirs(a.out_dir, exist_ok=True)
    np.random.seed(seed=696)
    device = torch.device('cuda')

    # setup generator
    Gs_kwargs = dnnlib.EasyDict()
    Gs_kwargs.verbose = a.verbose
    Gs_kwargs.size = a.size
    Gs_kwargs.scale_type = a.scale_type

    # mask/blend latents with external latmask or by splitting the frame
    if a.latmask is None:
        nHW = [int(s) for s in a.nXY.split('-')][::-1]
        assert len(nHW) == 2, ' Wrong count nXY: %d (must be 2)' % len(nHW)
        n_mult = nHW[0] * nHW[1]
        if a.verbose is True and n_mult > 1:
            print(' Latent blending w/split frame %d x %d' % (nHW[1], nHW[0]))
        lmask = np.tile(np.asarray([[[[1]]]]), (1, n_mult, 1, 1))
        Gs_kwargs.countHW = nHW
        Gs_kwargs.splitfine = a.splitfine
    else:
        if a.verbose is True: print(' Latent blending with mask', a.latmask)
        n_mult = 2
        if os.path.isfile(a.latmask):  # single file
            lmask = np.asarray([[img_read(a.latmask)[:, :, 0] / 255.]
                                ])  # [h,w]
        elif os.path.isdir(a.latmask):  # directory with frame sequence
            lmask = np.asarray([[
                img_read(f)[:, :, 0] / 255. for f in img_list(a.latmask)
            ]])  # [h,w]
        else:
            print(' !! Blending mask not found:', a.latmask)
            exit(1)
        lmask = np.concatenate((lmask, 1 - lmask), 1)  # [frm,2,h,w]
    lmask = torch.from_numpy(lmask).to(device)

    # load base or custom network
    pkl_name = osp.splitext(a.model)[0]
    if '.pkl' in a.model.lower():
        custom = False
        print(' .. Gs from pkl ..', basename(a.model))
    else:
        custom = True
        print(' .. Gs custom ..', basename(a.model))
    with dnnlib.util.open_url(pkl_name + '.pkl') as f:
        Gs = legacy.load_network_pkl(f,
                                     custom=custom, **Gs_kwargs)['G_ema'].to(
                                         device)  # type: ignore

    if a.verbose is True: print(' out shape', Gs.output_shape[1:])

    if a.verbose is True: print(' making timeline..')
    lats = []  # list of [frm,1,512]
    for i in range(n_mult):
        lat_tmp = latent_anima((1, Gs.z_dim),
                               a.frames,
                               a.fstep,
                               cubic=a.cubic,
                               gauss=a.gauss,
                               verbose=False)  # [frm,1,512]
        lats.append(lat_tmp)  # list of [frm,1,512]
    latents = np.concatenate(lats, 1)  # [frm,X,512]
    print(' latents', latents.shape)
    latents = torch.from_numpy(latents).to(device)
    frame_count = latents.shape[0]

    # distort image by tweaking initial const layer
    if a.digress > 0:
        try:
            init_res = Gs.init_res
        except:
            init_res = (4, 4)  # default initial layer size
        dconst = []
        for i in range(n_mult):
            dc_tmp = a.digress * latent_anima([1, Gs.z_dim, *init_res],
                                              a.frames,
                                              a.fstep,
                                              cubic=True,
                                              verbose=False)
            dconst.append(dc_tmp)
        dconst = np.concatenate(dconst, 1)
    else:
        dconst = np.zeros([frame_count, 1, 1, 1, 1])
    dconst = torch.from_numpy(dconst).to(device)

    # labels / conditions
    label_size = Gs.c_dim
    if label_size > 0:
        labels = torch.zeros((frame_count, n_mult, label_size),
                             device=device)  # [frm,X,lbl]
        if a.labels is None:
            label_ids = []
            for i in range(n_mult):
                label_ids.append(random.randint(0, label_size - 1))
        else:
            label_ids = [int(x) for x in a.labels.split('-')]
            label_ids = label_ids[:n_mult]  # ensure we have enough labels
        for i, l in enumerate(label_ids):
            labels[:, i, l] = 1
    else:
        labels = [None]

    # generate images from latent timeline
    pbar = ProgressBar(frame_count)
    for i in range(frame_count):

        latent = latents[i]  # [X,512]
        label = labels[i % len(labels)]
        latmask = lmask[i %
                        len(lmask)] if lmask is not None else [None]  # [X,h,w]
        dc = dconst[i % len(dconst)]  # [X,512,4,4]

        # generate multi-latent result
        if custom:
            output = Gs(latent,
                        label,
                        latmask,
                        dc,
                        truncation_psi=a.trunc,
                        noise_mode='const')
        else:
            output = Gs(latent,
                        label,
                        truncation_psi=a.trunc,
                        noise_mode='const')
        output = (output.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(
            torch.uint8).cpu().numpy()

        # save image
        ext = 'png' if output.shape[3] == 4 else 'jpg'
        filename = osp.join(a.out_dir, "%06d.%s" % (i, ext))
        imsave(filename, output[0])
        pbar.upd()

    # convert latents to dlatents, save them
    if a.save_lat is True:
        latents = latents.squeeze(1)  # [frm,512]
        dlatents = Gs.mapping(latents, label)  # [frm,18,512]
        if a.size is None: a.size = [''] * 2
        filename = '{}-{}-{}.npy'.format(basename(a.model), a.size[1],
                                         a.size[0])
        filename = osp.join(osp.dirname(a.out_dir), filename)
        dlatents = dlatents.cpu().numpy()
        np.save(filename, dlatents)
        print('saved dlatents', dlatents.shape, 'to', filename)
Example #7
0
def main():
    os.makedirs(a.out_dir, exist_ok=True)
    device = torch.device('cuda')

    # setup generator
    Gs_kwargs = dnnlib.EasyDict()
    Gs_kwargs.verbose = a.verbose
    Gs_kwargs.size = a.size
    Gs_kwargs.scale_type = a.scale_type

    # load base or custom network
    pkl_name = osp.splitext(a.model)[0]
    if '.pkl' in a.model.lower():
        custom = False
        print(' .. Gs from pkl ..', basename(a.model))
    else:
        custom = True
        print(' .. Gs custom ..', basename(a.model))
    with dnnlib.util.open_url(pkl_name + '.pkl') as f:
        Gs = legacy.load_network_pkl(f,
                                     custom=custom, **Gs_kwargs)['G_ema'].to(
                                         device)  # type: ignore

    dlat_shape = (1, Gs.num_ws, Gs.w_dim)  # [1,18,512]

    # read saved latents
    if a.dlatents is not None and osp.isfile(a.dlatents):
        key_dlatents = load_latents(a.dlatents)
        if len(key_dlatents.shape) == 2:
            key_dlatents = np.expand_dims(key_dlatents, 0)
    elif a.dlatents is not None and osp.isdir(a.dlatents):
        # if a.dlatents.endswith('/') or a.dlatents.endswith('\\'): a.dlatents = a.dlatents[:-1]
        key_dlatents = []
        npy_list = file_list(a.dlatents, 'npy')
        for npy in npy_list:
            key_dlatent = load_latents(npy)
            if len(key_dlatent.shape) == 2:
                key_dlatent = np.expand_dims(key_dlatent, 0)
            key_dlatents.append(key_dlatent)
        key_dlatents = np.concatenate(key_dlatents)  # [frm,18,512]
    else:
        print(' No input dlatents found')
        exit()
    key_dlatents = key_dlatents[:, np.newaxis]  # [frm,1,18,512]
    print(' key dlatents', key_dlatents.shape)

    # replace higher layers with single (style) latent
    if a.style_dlat is not None:
        print(' styling with dlatent', a.style_dlat)
        style_dlatent = load_latents(a.style_dlat)
        while len(style_dlatent.shape) < 4:
            style_dlatent = np.expand_dims(style_dlatent, 0)
        # try replacing 5 by other value, less than Gs.num_ws
        key_dlatents[:, :, range(5, Gs.num_ws
                                 ), :] = style_dlatent[:, :,
                                                       range(5, Gs.num_ws), :]

    frames = key_dlatents.shape[0] * a.fstep

    dlatents = latent_anima(dlat_shape,
                            frames,
                            a.fstep,
                            key_latents=key_dlatents,
                            cubic=a.cubic,
                            verbose=True)  # [frm,1,512]
    print(' dlatents', dlatents.shape)
    frame_count = dlatents.shape[0]
    dlatents = torch.from_numpy(dlatents).to(device)

    # distort image by tweaking initial const layer
    if a.digress > 0:
        try:
            init_res = Gs.init_res
        except Exception:
            init_res = (4, 4)  # default initial layer size
        dconst = a.digress * latent_anima([1, Gs.z_dim, *init_res],
                                          frame_count,
                                          a.fstep,
                                          cubic=True,
                                          verbose=False)
    else:
        dconst = np.zeros([frame_count, 1, 1, 1, 1])
    dconst = torch.from_numpy(dconst).to(device)

    # generate images from latent timeline
    pbar = ProgressBar(frame_count)
    for i in range(frame_count):

        # generate multi-latent result
        if custom:
            output = Gs.synthesis(dlatents[i],
                                  None,
                                  dconst[i],
                                  noise_mode='const')
        else:
            output = Gs.synthesis(dlatents[i], noise_mode='const')
        output = (output.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(
            torch.uint8).cpu().numpy()

        ext = 'png' if output.shape[3] == 4 else 'jpg'
        filename = osp.join(a.out_dir, "%06d.%s" % (i, ext))
        imsave(filename, output[0])
        pbar.upd()
Example #8
0
def update_G(src_dict, tgt_net, size, n_mlp):
    log_size = int(math.log(size, 2))

    pbar = ProgressBar(n_mlp + log_size - 2 + log_size - 2 +
                       (log_size - 2) * 2 + 1 + 2)
    for i in range(n_mlp):
        convert_dense(tgt_net, src_dict, f"G_mapping/Dense{i}", f"style.{i+1}")
        pbar.upd()
    update(tgt_net, "G_synthesis/4x4/Const/const", src_dict["input.input"])
    convert_torgb(tgt_net, src_dict, "G_synthesis/4x4/ToRGB", "to_rgb1")
    pbar.upd()

    for i in range(log_size - 2):
        reso = 4 * 2**(i + 1)
        convert_torgb(tgt_net, src_dict, f"G_synthesis/{reso}x{reso}/ToRGB",
                      f"to_rgbs.{i}")
        pbar.upd()
    convert_modconv(tgt_net, src_dict, "G_synthesis/4x4/Conv", "conv1")
    pbar.upd()

    conv_i = 0
    for i in range(log_size - 2):
        reso = 4 * 2**(i + 1)
        convert_modconv(tgt_net,
                        src_dict,
                        f"G_synthesis/{reso}x{reso}/Conv0_up",
                        f"convs.{conv_i}",
                        flip=True)
        convert_modconv(tgt_net, src_dict, f"G_synthesis/{reso}x{reso}/Conv1",
                        f"convs.{conv_i + 1}")
        conv_i += 2
        pbar.upd()

    for i in range(0, (log_size - 2) * 2 + 1):
        update(tgt_net, f"G_synthesis/noise{i}", src_dict[f"noises.noise_{i}"])
        pbar.upd()
Example #9
0
def main():
    os.makedirs(a.out_dir, exist_ok=True)
    np.random.seed(seed=696)

    # setup generator
    fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
    Gs_kwargs = dnnlib.EasyDict()
    Gs_kwargs.func_name = 'training.stylegan2_multi.G_main'
    Gs_kwargs.verbose = a.verbose
    Gs_kwargs.size = a.size
    Gs_kwargs.scale_type = a.scale_type
    Gs_kwargs.impl = a.ops
    
    # mask/blend latents with external latmask or by splitting the frame
    if a.latmask is None:
        nHW = [int(s) for s in a.nXY.split('-')][::-1]
        assert len(nHW)==2, ' Wrong count nXY: %d (must be 2)' % len(nHW)
        n_mult = nHW[0] * nHW[1]
        if a.verbose is True and n_mult > 1: print(' Latent blending w/split frame %d x %d' % (nHW[1], nHW[0]))
        lmask = np.tile(np.asarray([[[[None]]]]), (1,n_mult,1,1))
        Gs_kwargs.countHW = nHW
        Gs_kwargs.splitfine = a.splitfine
    else:
        if a.verbose is True: print(' Latent blending with mask', a.latmask)
        n_mult = 2
        if os.path.isfile(a.latmask): # single file
            lmask = np.asarray([[img_read(a.latmask)[:,:,0] / 255.]]) # [h,w]
        elif os.path.isdir(a.latmask): # directory with frame sequence
            lmask = np.asarray([[img_read(f)[:,:,0] / 255. for f in img_list(a.latmask)]]) # [h,w]
        else:
            print(' !! Blending mask not found:', a.latmask); exit(1)
        lmask = np.concatenate((lmask, 1 - lmask), 1) # [frm,2,h,w]
        Gs_kwargs.latmask_res = lmask.shape[2:]
    
    # load model with arguments
    sess = tflib.init_tf({'allow_soft_placement':True})
    pkl_name = osp.splitext(a.model)[0]
    with open(pkl_name + '.pkl', 'rb') as file:
        network = pickle.load(file, encoding='latin1')
    try: _, _, network = network
    except: pass
    for k in list(network.static_kwargs.keys()):
        Gs_kwargs[k] = network.static_kwargs[k]

    # reload custom network, if needed
    if '.pkl' in a.model.lower(): 
        print(' .. Gs from pkl ..', basename(a.model))
        Gs = network
    else: # reconstruct network
        print(' .. Gs custom ..', basename(a.model))
        # print(Gs_kwargs)
        Gs = tflib.Network('Gs', **Gs_kwargs)
        Gs.copy_vars_from(network)
    if a.verbose is True: print('kwargs:', ['%s: %s'%(kv[0],kv[1]) for kv in sorted(Gs.static_kwargs.items())])

    if a.verbose is True: print(' out shape', Gs.output_shape[1:])
    if a.size is None: a.size = Gs.output_shape[2:]

    if a.verbose is True: print(' making timeline..')
    lats = [] # list of [frm,1,512]
    for i in range(n_mult):
        lat_tmp = latent_anima((1, Gs.input_shape[1]), a.frames, a.fstep, cubic=a.cubic, gauss=a.gauss, verbose=False) # [frm,1,512]
        lats.append(lat_tmp) # list of [frm,1,512]
    latents = np.concatenate(lats, 1) # [frm,X,512]
    print(' latents', latents.shape)
    frame_count = latents.shape[0]
    
    # distort image by tweaking initial const layer
    if a.digress > 0:
        try: latent_size = Gs.static_kwargs['latent_size']
        except: latent_size = 512 # default latent size
        try: init_res = Gs.static_kwargs['init_res']
        except: init_res = (4,4) # default initial layer size 
        dconst = []
        for i in range(n_mult):
            dc_tmp = a.digress * latent_anima([1, latent_size, *init_res], a.frames, a.fstep, cubic=True, verbose=False)
            dconst.append(dc_tmp)
        dconst = np.concatenate(dconst, 1)
    else:
        dconst = np.zeros([frame_count, 1, 1, 1, 1])

    # labels / conditions
    try:
        label_size = Gs_kwargs.label_size
    except:
        label_size = 0
    if label_size > 0:
        labels = np.zeros((frame_count, n_mult, label_size)) # [frm,X,lbl]
        if a.labels is None:
            label_ids = []
            for i in range(n_mult):
                label_ids.append(random.randint(0, label_size-1))
        else:
            label_ids = [int(x) for x in a.labels.split('-')]
            label_ids = label_ids[:n_mult] # ensure we have enough labels
        for i, l in enumerate(label_ids):
            labels[:,i,l] = 1
    else:
        labels = [None]

    # generate images from latent timeline
    pbar = ProgressBar(frame_count)
    for i in range(frame_count):
    
        latent  = latents[i] # [X,512]
        label   = labels[i % len(labels)]
        latmask = lmask[i % len(lmask)] if lmask is not None else [None] # [X,h,w]
        dc      = dconst[i % len(dconst)] # [X,512,4,4]

        # generate multi-latent result
        if Gs.num_inputs == 2:
            output = Gs.run(latent, label, truncation_psi=a.trunc, randomize_noise=False, output_transform=fmt)
        else:
            output = Gs.run(latent, label, latmask, dc, truncation_psi=a.trunc, randomize_noise=False, output_transform=fmt)

        # save image
        ext = 'png' if output.shape[3]==4 else 'jpg'
        filename = osp.join(a.out_dir, "%06d.%s" % (i,ext))
        imsave(filename, output[0])
        pbar.upd()

    # convert latents to dlatents, save them
    if a.save_lat is True:
        latents = latents.squeeze(1) # [frm,512]
        dlatents = Gs.components.mapping.run(latents, label, dtype='float16') # [frm,18,512]
        filename = '{}-{}-{}.npy'.format(basename(a.model), a.size[1], a.size[0])
        filename = osp.join(osp.dirname(a.out_dir), filename)
        np.save(filename, dlatents)
        print('saved dlatents', dlatents.shape, 'to', filename)
Example #10
0
def main():
    if a.vector_dir is not None:
        if a.vector_dir.endswith('/') or a.vector_dir.endswith('\\'):
            a.vector_dir = a.vector_dir[:-1]
    os.makedirs(a.out_dir, exist_ok=True)
    device = torch.device('cuda')

    global Gs, use_d, custom

    # setup generator
    Gs_kwargs = dnnlib.EasyDict()
    Gs_kwargs.verbose = a.verbose
    Gs_kwargs.size = a.size
    Gs_kwargs.scale_type = a.scale_type

    # load base or custom network
    pkl_name = osp.splitext(a.model)[0]
    if '.pkl' in a.model.lower():
        custom = False
        print(' .. Gs from pkl ..', basename(a.model))
    else:
        custom = True
        print(' .. Gs custom ..', basename(a.model))
    with dnnlib.util.open_url(pkl_name + '.pkl') as f:
        Gs = legacy.load_network_pkl(f,
                                     custom=custom, **Gs_kwargs)['G_ema'].to(
                                         device)  # type: ignore

    # load directions
    if a.vector_dir is not None:
        directions = []
        vector_list = file_list(a.vector_dir, 'npy')
        for v in vector_list:
            direction = load_latents(v)
            if len(direction.shape) == 2:
                direction = np.expand_dims(direction, 0)
            directions.append(direction)
        directions = np.concatenate(directions)[:,
                                                np.newaxis]  # [frm,1,18,512]
    else:
        print(' No vectors found')
        exit()

    if len(direction[0].shape) > 1 and direction[0].shape[0] > 1:
        use_d = True
    print(' directions', directions.shape, 'using d' if use_d else 'using w')
    directions = torch.from_numpy(directions).to(device)

    # latent direction range
    lrange = [-0.5, 0.5]

    # load saved latents
    if a.base_lat is not None:
        base_latent = load_latents(a.base_lat)
        base_latent = torch.from_numpy(base_latent).to(device)
    else:
        print(' No NPY input given, making random')
        base_latent = np.random.randn(1, Gs.z_dim)
        if use_d:
            base_latent = Gs.mapping(base_latent, None)  # [frm,18,512]

    pbar = ProgressBar(len(directions))
    for i, direction in enumerate(directions):
        make_loop(base_latent, direction, lrange, a.fstep * 2, a.fstep * 2 * i)
        pbar.upd()
Example #11
0
def main():
    if a.vector_dir is not None:
        if a.vector_dir.endswith('/') or a.vector_dir.endswith('\\'):
            a.vector_dir = a.vector_dir[:-1]
    os.makedirs(a.out_dir, exist_ok=True)

    global Gs, use_d

    # setup generator
    Gs_kwargs = dnnlib.EasyDict()
    Gs_kwargs.func_name = 'training.stylegan2_multi.G_main'
    Gs_kwargs.verbose = a.verbose
    Gs_kwargs.size = a.size
    Gs_kwargs.scale_type = a.scale_type
    Gs_kwargs.impl = a.ops

    # load model with arguments
    sess = tflib.init_tf({'allow_soft_placement': True})
    pkl_name = osp.splitext(a.model)[0]
    with open(pkl_name + '.pkl', 'rb') as file:
        network = pickle.load(file, encoding='latin1')
    try:
        _, _, network = network
    except:
        pass
    for k in list(network.static_kwargs.keys()):
        Gs_kwargs[k] = network.static_kwargs[k]

    # reload custom network, if needed
    if '.pkl' in a.model.lower():
        print(' .. Gs from pkl ..', basename(a.model))
        Gs = network
    else:  # reconstruct network
        print(' .. Gs custom ..', basename(a.model))
        Gs = tflib.Network('Gs', **Gs_kwargs)
        Gs.copy_vars_from(network)

    # load directions
    if a.vector_dir is not None:
        directions = []
        vector_list = file_list(a.vector_dir, 'npy')
        for v in vector_list:
            direction = load_latents(v)
            if len(direction.shape) == 2:
                direction = np.expand_dims(direction, 0)
            directions.append(direction)
        directions = np.concatenate(directions)[:,
                                                np.newaxis]  # [frm,1,18,512]
    else:
        print(' No vectors found')
        exit()

    if len(direction[0].shape) > 1 and direction[0].shape[0] > 1:
        use_d = True
    print(' directions', directions.shape, 'using d' if use_d else 'using w')

    # latent direction range
    lrange = [-0.5, 0.5]

    # load saved latents
    if a.base_lat is not None:
        base_latent = load_latents(a.base_lat)
    else:
        print(' No NPY input given, making random')
        z_dim = Gs.input_shape[1]
        shape = (1, z_dim)
        base_latent = np.random.randn(*shape)
        if use_d:
            base_latent = Gs.components.mapping.run(base_latent,
                                                    None)  # [frm,18,512]

    pbar = ProgressBar(len(directions))
    for i, direction in enumerate(directions):
        make_loop(base_latent, direction, lrange, a.fstep * 2, a.fstep * 2 * i)
        pbar.upd()
Example #12
0
def project(
        G,
        target: torch.
    Tensor,  # [C,H,W] and dynamic range [0,255], W & H must match G output resolution
        *,
        num_steps=1000,
        w_avg_samples=10000,
        initial_learning_rate=0.1,
        initial_noise_factor=0.05,
        lr_rampdown_length=0.25,
        lr_rampup_length=0.05,
        noise_ramp_length=0.75,
        regularize_noise_weight=1e5,
        verbose=False,
        device: torch.device):
    assert target.shape == (G.img_channels, G.img_resolution, G.img_resolution)

    # def logprint(*args):
    # if verbose:
    # print(*args)

    G = copy.deepcopy(G).eval().requires_grad_(False).to(
        device)  # type: ignore

    # Compute w stats.
    # logprint(f'Computing W midpoint and stddev using {w_avg_samples} samples...')
    z_samples = np.random.RandomState(123).randn(w_avg_samples, G.z_dim)
    w_samples = G.mapping(torch.from_numpy(z_samples).to(device),
                          None)  # [N, L, C]
    w_samples = w_samples[:, :1, :].cpu().numpy().astype(
        np.float32)  # [N, 1, C]
    w_avg = np.mean(w_samples, axis=0, keepdims=True)  # [1, 1, C]
    w_std = (np.sum((w_samples - w_avg)**2) / w_avg_samples)**0.5

    # Setup noise inputs.
    noise_bufs = {
        name: buf
        for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name
    }

    # Load VGG16 feature detector.
    vgg_file = 'models/vgg/vgg16.pt'
    if os.path.isfile(vgg_file) and os.stat(vgg_file).st_size == 553469545:
        with dnnlib.util.open_url(vgg_file) as file:
            # network = pickle.load(file, encoding='latin1')
            vgg16 = torch.jit.load(file).eval().to(device)
    else:
        with dnnlib.util.open_url(
                'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt'
        ) as file:
            vgg16 = torch.jit.load(file).eval().to(device)

    # Features for target image.
    target_images = target.unsqueeze(0).to(device).to(torch.float32)
    if target_images.shape[2] > 256:
        target_images = F.interpolate(target_images,
                                      size=(256, 256),
                                      mode='area')
    target_features = vgg16(target_images,
                            resize_images=False,
                            return_lpips=True)

    w_opt = torch.tensor(w_avg,
                         dtype=torch.float32,
                         device=device,
                         requires_grad=True)  # pylint: disable=not-callable
    w_out = torch.zeros([num_steps] + list(w_opt.shape[1:]),
                        dtype=torch.float32,
                        device=device)
    optimizer = torch.optim.Adam([w_opt] + list(noise_bufs.values()),
                                 betas=(0.9, 0.999),
                                 lr=initial_learning_rate)

    # Init noise.
    for buf in noise_bufs.values():
        buf[:] = torch.randn_like(buf)
        buf.requires_grad = True

    pbar = ProgressBar(num_steps)
    for step in range(num_steps):
        # Learning rate schedule.
        t = step / num_steps
        w_noise_scale = w_std * initial_noise_factor * max(
            0.0, 1.0 - t / noise_ramp_length)**2
        lr_ramp = min(1.0, (1.0 - t) / lr_rampdown_length)
        lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
        lr_ramp = lr_ramp * min(1.0, t / lr_rampup_length)
        lr = initial_learning_rate * lr_ramp
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        # Synth images from opt_w.
        w_noise = torch.randn_like(w_opt) * w_noise_scale
        ws = (w_opt + w_noise).repeat([1, G.mapping.num_ws, 1])
        synth_images = G.synthesis(ws, noise_mode='const')

        # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
        synth_images = (synth_images + 1) * (255 / 2)
        if synth_images.shape[2] > 256:
            synth_images = F.interpolate(synth_images,
                                         size=(256, 256),
                                         mode='area')

        # Features for synth images.
        synth_features = vgg16(synth_images,
                               resize_images=False,
                               return_lpips=True)
        dist = (target_features - synth_features).square().sum()

        # Noise regularization.
        reg_loss = 0.0
        for v in noise_bufs.values():
            noise = v[None, None, :, :]  # must be [1,1,H,W] for F.avg_pool2d()
            while True:
                reg_loss += (noise *
                             torch.roll(noise, shifts=1, dims=3)).mean()**2
                reg_loss += (noise *
                             torch.roll(noise, shifts=1, dims=2)).mean()**2
                if noise.shape[2] <= 8:
                    break
                noise = F.avg_pool2d(noise, kernel_size=2)
        loss = dist + reg_loss * regularize_noise_weight

        # Step
        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()
        # logprint(f'step {step+1:>4d}/{num_steps}: dist {dist:<4.2f} loss {float(loss):<5.2f}')

        # Save projected W for each optimization step.
        w_out[step] = w_opt.detach()[0]

        # Normalize noise.
        with torch.no_grad():
            for buf in noise_bufs.values():
                buf -= buf.mean()
                buf *= buf.square().mean().rsqrt()
        pbar.upd()

    return w_out.repeat([1, G.mapping.num_ws, 1])
Example #13
0
def main():
    os.makedirs(a.out_dir, exist_ok=True)

    # setup generator
    fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
    Gs_kwargs = dnnlib.EasyDict()
    Gs_kwargs.func_name = 'training.stylegan2_multi.G_main'
    Gs_kwargs.verbose = a.verbose
    Gs_kwargs.size = a.size
    Gs_kwargs.scale_type = a.scale_type
    Gs_kwargs.impl = a.ops

    # load model with arguments
    sess = tflib.init_tf({'allow_soft_placement': True})
    pkl_name = osp.splitext(a.model)[0]
    with open(pkl_name + '.pkl', 'rb') as file:
        network = pickle.load(file, encoding='latin1')
    try:
        _, _, network = network
    except:
        pass
    for k in list(network.static_kwargs.keys()):
        Gs_kwargs[k] = network.static_kwargs[k]

    # reload custom network, if needed
    if '.pkl' in a.model.lower():
        print(' .. Gs from pkl ..', basename(a.model))
        Gs = network
    else:  # reconstruct network
        print(' .. Gs custom ..', basename(a.model))
        Gs = tflib.Network('Gs', **Gs_kwargs)
        Gs.copy_vars_from(network)

    z_dim = Gs.input_shape[1]
    dz_dim = 512  # dlatent_size
    try:
        dl_dim = 2 * (int(np.floor(np.log2(Gs_kwargs.resolution))) - 1)
    except:
        print(' Resave model, no resolution kwarg found!')
        exit(1)
    dlat_shape = (1, dl_dim, dz_dim)  # [1,18,512]

    # read saved latents
    if a.dlatents is not None and osp.isfile(a.dlatents):
        key_dlatents = load_latents(a.dlatents)
        if len(key_dlatents.shape) == 2:
            key_dlatents = np.expand_dims(key_dlatents, 0)
    elif a.dlatents is not None and osp.isdir(a.dlatents):
        # if a.dlatents.endswith('/') or a.dlatents.endswith('\\'): a.dlatents = a.dlatents[:-1]
        key_dlatents = []
        npy_list = file_list(a.dlatents, 'npy')
        for npy in npy_list:
            key_dlatent = load_latents(npy)
            if len(key_dlatent.shape) == 2:
                key_dlatent = np.expand_dims(key_dlatent, 0)
            key_dlatents.append(key_dlatent)
        key_dlatents = np.concatenate(key_dlatents)  # [frm,18,512]
    else:
        print(' No input dlatents found')
        exit()
    key_dlatents = key_dlatents[:, np.newaxis]  # [frm,1,18,512]
    print(' key dlatents', key_dlatents.shape)

    # replace higher layers with single (style) latent
    if a.style_dlat is not None:
        print(' styling with dlatent', a.style_dlat)
        style_dlatent = load_latents(a.style_dlat)
        while len(style_dlatent.shape) < 4:
            style_dlatent = np.expand_dims(style_dlatent, 0)
        # try replacing 5 by other value, less than dl_dim
        key_dlatents[:, :,
                     range(5, dl_dim), :] = style_dlatent[:, :,
                                                          range(5, dl_dim), :]

    frames = key_dlatents.shape[0] * a.fstep

    dlatents = latent_anima(dlat_shape,
                            frames,
                            a.fstep,
                            key_latents=key_dlatents,
                            cubic=a.cubic,
                            verbose=True)  # [frm,1,512]
    print(' dlatents', dlatents.shape)
    frame_count = dlatents.shape[0]

    # truncation trick
    dlatent_avg = Gs.get_var('dlatent_avg')  # (512,)
    tr_range = range(0, 8)
    dlatents[:, :, tr_range, :] = dlatent_avg + (dlatents[:, :, tr_range, :] -
                                                 dlatent_avg) * a.trunc

    # distort image by tweaking initial const layer
    if a.digress > 0:
        try:
            latent_size = Gs.static_kwargs['latent_size']
        except:
            latent_size = 512  # default latent size
        try:
            init_res = Gs.static_kwargs['init_res']
        except:
            init_res = (4, 4)  # default initial layer size
        dconst = a.digress * latent_anima([1, latent_size, *init_res],
                                          frames,
                                          a.fstep,
                                          cubic=True,
                                          verbose=False)
    else:
        dconst = np.zeros([frame_count, 1, 1, 1, 1])

    # generate images from latent timeline
    pbar = ProgressBar(frame_count)
    for i in range(frame_count):

        # generate multi-latent result
        if Gs.num_inputs == 2:
            output = Gs.components.synthesis.run(dlatents[i],
                                                 randomize_noise=False,
                                                 output_transform=fmt,
                                                 minibatch_size=1)
        else:
            output = Gs.components.synthesis.run(dlatents[i], [None],
                                                 dconst[i],
                                                 randomize_noise=False,
                                                 output_transform=fmt,
                                                 minibatch_size=1)

        ext = 'png' if output.shape[3] == 4 else 'jpg'
        filename = osp.join(a.out_dir, "%06d.%s" % (i, ext))
        imsave(filename, output[0])
        pbar.upd()