def __getitem__(self, index):
        # Sample z
        noise = util.get_noise(self.args)

        # Generate output - do this on GPU
        self.model = self.model.cuda()
        noise = noise.cuda()
        if 'BigGAN' in self.model_name:
            from torch_pretrained_biggan import one_hot_from_int
            class_vector = one_hot_from_int(
                207, batch_size=1
            )  # TODO: check if batch size 1 makes sense for single getitem
            class_vector = torch.from_numpy(class_vector)
            class_vector = class_vector.cuda()

            image = self.model.forward(noise, class_vector,
                                       args.truncation).float()
        else:
            image = self.model.forward(noise).float()

        # Take off GPU
        self.model = self.model.cpu()
        noise = noise.cpu()

        # Normalize image
        image = (image + 1.) / 2.

        # Return pair
        return image, noise
Exemple #2
0
def get_minibatch(batch_size, silence_percent=0.20, unknown_percent=0.20):
    res = np.zeros((batch_size, w, h))
    y = np.zeros((batch_size, len(labels) + 2))
    for i in range(batch_size):
        if random.choice(range(int(1 / silence_percent))) == 0:
            chunk_byte = util.get_noise(noise_chunks)
            res[i, :] = audiofile_to_input_vector(chunk_byte, fs, numcep,
                                                  numcontext)
            y[i, all_labels.index('silence')] = 1.0  # silence
        elif random.choice(range(int(1 / unknown_percent))) == 0:
            f = random.choice(unknown_files)
            wav = io.BytesIO(open(f).read())
            v = scipy.io.wavfile.read(wav)
            vv = v[1]

            vv = util.tf_random_add_noise_transform(vv, noise_chunks)
            vv = util.tf_random_time_shift_transform(vv)
            vv = util.tf_random_pad_transform(vv)
            vv = util.tf_fix_pad_transform(vv)

            mfcca = audiofile_to_input_vector(adj_volume(v[1]), fs, numcep,
                                              numcontext)
            res[i, 0:mfcca.shape[0], 0:mfcca.shape[1]] = mfcca
            y[i, all_labels.index('unknown')] = 1.0  # unknown
        else:
            f = random.choice(train_files)
            wav = io.BytesIO(open(f).read())
            v = scipy.io.wavfile.read(wav)
            vv = v[1]

            vv = util.tf_random_add_noise_transform(vv, noise_chunks)
            vv = util.tf_random_time_shift_transform(vv)
            vv = util.tf_random_pad_transform(vv)
            vv = util.tf_fix_pad_transform(vv)

            mfcca = audiofile_to_input_vector(adj_volume(vv), fs, numcep,
                                              numcontext)
            res[i, 0:mfcca.shape[0], 0:mfcca.shape[1]] = mfcca
            label = re.findall(".*/(.*?)/.*?.wav", f)[0]
            y[i, labels.index(label)] = 1.0

    return res, y
Exemple #3
0
    def __init__(self, conf, lr, device=torch.device('cuda')):

        # Acquire configuration
        self.conf = conf
        self.lr = lr
        self.sf = conf.sf
        self.kernel_size = min(conf.sf * 4 + 3, 21)

        # DIP model
        _, C, H, W = self.lr.size()
        self.input_dip = get_noise(C, 'noise', (H * self.sf, W * self.sf)).to(device).detach()
        self.net_dip = skip(C, 3,
                            num_channels_down=[128, 128, 128, 128, 128],
                            num_channels_up=[128, 128, 128, 128, 128],
                            num_channels_skip=[16, 16, 16, 16, 16],
                            upsample_mode='bilinear',
                            need_sigmoid=True, need_bias=True, pad='reflection', act_fun='LeakyReLU')
        self.net_dip = self.net_dip.to(device)
        self.optimizer_dip = torch.optim.Adam([{'params': self.net_dip.parameters()}], lr=conf.dip_lr)

        # normalizing flow as kernel prior
        if conf.model == 'DIPFKP':
            # initialze the kernel to be smooth is slightly better
            seed = 5
            np.random.seed(seed)
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
            torch.backends.cudnn.benchmark = True

            self.net_kp = KernelPrior(n_blocks=5, input_size=self.kernel_size ** 2, hidden_size=min((self.sf+1)*5, 25),
                                      n_hidden=1)

            state = torch.load(conf.path_KP)
            self.net_kp.load_state_dict(state['model_state'])
            self.net_kp = self.net_kp.to(device)
            self.net_kp.eval()
            for p in self.net_kp.parameters(): p.requires_grad = False

            self.kernel_code = self.net_kp.base_dist.sample((1, 1)).to(device)
            self.kernel_code.requires_grad = True

            self.optimizer_kp = SphericalOptimizer(self.kernel_size, torch.optim.Adam, [self.kernel_code],
                                                   lr=conf.kp_lr)

        # baseline, softmax as kernel prior
        elif conf.model == 'DIPSoftmax':
            self.kernel_code =torch.ones(self.kernel_size ** 2).to(device)
            self.kernel_code.requires_grad = True

            self.optimizer_kp = torch.optim.Adam([{'params': self.kernel_code}], lr=conf.kp_lr)

        # fc layers as kernel prior, accroding to Double-DIP/Selfdeblur, set lr = 1e-4
        elif conf.model == 'DoubleDIP':
            n_k = 200
            self.kernel_code = get_noise(n_k, 'noise', (1, 1)).detach().squeeze().to(device)

            self.net_kp = fcn(n_k, self.kernel_size ** 2).to(device)

            self.optimizer_kp = torch.optim.Adam([{'params': self.net_kp.parameters()}], lr=1e-4)

        # loss
        self.ssimloss = SSIM().to(device)
        self.mse = torch.nn.MSELoss().to(device)

        print('*' * 60 + '\nSTARTED {} on: {}...'.format(conf.model, conf.input_image_path))
Exemple #4
0
def train_inverted_net(args):
    # Start by training an external model on samples of G(z) -> z inversion
    model = util.get_invert_model(args)

    model = nn.DataParallel(model, args.gpu_ids)
    model = model.to(args.device)
    print(f'{args.invert_model} num params {count_parameters(model)}')

    generator = util.get_model(args)
    if generator is not None:
        generator = nn.DataParallel(generator, args.gpu_ids)
        generator = generator.to(args.device)
        print(f'{args.model} num params {count_parameters(generator)}')
    else:
        # Load saved pairings (ProGAN/StyleGAN)
        pairing_dir = '/deep/group/gen-eval/model-training/src/GAN_models/stylegan'
        pairing_path = f'{pairing_dir}/otavio_sampled_output/pairing.csv'
        pairings = pd.read_csv(pairing_path)

        num_pairings = len(pairings)
        noise_targets = pairings['noise']
        image_inputs = pairings['image']

    if 'BigGAN' in args.model:
        class_vector = one_hot_from_int(207, batch_size=args.batch_size)
        class_vector = torch.from_numpy(class_vector)
        class_vector = class_vector.cuda()

    # TODO: remove bc cant use gpu in laoder i don't think
    #loader = get_loader(args, phase='invert')

    #logger = TestLogger(args)
    #logger.log_hparams(args)

    criterion = torch.nn.MSELoss().to(args.device)
    optimizer = util.get_optimizer(model.parameters(), args)

    for i in range(args.num_invert_epochs):
        if generator is not None:
            noise_target = util.get_noise(args)

            image_input = generator.forward(noise_target).float()
            image_input = (image_input + 1.) / 2.
        else:
            # TODO: make into loader
            idx = i % num_pairings
            noise_target = np.load(f'{pairing_dir}/{noise_targets[idx]}')
            noise_target = torch.from_numpy(noise_target).float()
            print(f'noise target shape {noise_target.shape}')

            image_input = np.array(
                Image.open(f'{pairing_dir}/{image_inputs[idx]}'))
            image_input = torch.from_numpy(image_input / 255.)
            image_input = image_input.float().unsqueeze(0)
            image_input = image_input.permute(0, 3, 1, 2)

        noise_target = noise_target.cuda()
        image_input = image_input.cuda()

        with torch.set_grad_enabled(True):
            probs = model.forward(image_input)

            loss = torch.zeros(1, requires_grad=True).to(args.device)
            loss = criterion(probs, noise_target)
            print(f'iter {i}: loss = {loss}')

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if i % 1 == 0:
            corres_image_input = image_input.detach().cpu()
            corres_np = util.convert_image_from_tensor(corres_image_input)

            # Run check - saving image
            if 'BigGAN' in args.model:
                predicted_image = generator.forward(probs, class_vector,
                                                    truncation).float()
            else:
                if generator is not None:
                    predicted_image = generator.forward(probs).float()

                    predicted_image = predicted_image.detach().cpu()
                    predicted_image = (predicted_image + 1) / 2.
                    predicted_np = util.convert_image_from_tensor(
                        predicted_image)

                    if len(predicted_np.shape) == 4:
                        predicted_np = predicted_np[0]
                        corres_np = corres_np[0]
                    visuals = util.concat_images([predicted_np, corres_np])
                    visuals_pil = Image.fromarray(visuals)
                    timestamp = datetime.now().strftime('%b%d_%H%M%S%f')
                    visuals_image_dir = f'predicted_inversion_images/{args.model}'
                    os.makedirs(visuals_image_dir, exist_ok=True)
                    visuals_image_path = f'{visuals_image_dir}/{timestamp}_{i}.png'
                    visuals_pil.save(visuals_image_path)

                    print(f'Saved {visuals_image_path}')
                else:
                    # Save noise vector - do forward separately in tf env
                    probs = probs.detach().cpu().numpy()
                    pred_noise_dir = f'predicted_inversion_noise/{args.model}'
                    os.makedirs(pred_noise_dir, exist_ok=True)

                    pred_noise_path = f'{pred_noise_dir}/{args.model}_noise_{i}.npy'
                    np.save(pred_noise_path, probs)

                    print(f'Saved {pred_noise_path}')

        if i % 1 == 0:
            corres_image_input = image_input.detach().cpu()
            corres_np = util.convert_image_from_tensor(corres_image_input)

            if len(corres_np.shape) == 4:
                corres_np = corres_np[0]

            corres_pil = Image.fromarray(corres_np)
            timestamp = datetime.now().strftime('%b%d_%H%M%S%f')
            corres_image_dir = f'generated_images/{args.model}'
            os.makedirs(corres_image_dir, exist_ok=True)
            corres_image_path = f'{corres_image_dir}/{timestamp}_{i}.png'
            corres_pil.save(corres_image_path)

    # saver = ModelSaver(args)
    global_step = args.num_invert_epochs
    ckpt_dict = {
        'ckpt_info': {
            'global_step': global_step
        },
        'model_name': model.module.__class__.__name__,
        'model_args': model.module.args_dict(),
        'model_state': model.to('cpu').state_dict(),
        'optimizer': optimizer.state_dict(),
    }

    ckpt_dir = os.path.join(args.save_dir, f'{args.model}')
    os.makedirs(ckpt_dir, exist_ok=True)
    ckpt_path = os.path.join(
        ckpt_dir, f'{args.invert_model}_step_{global_step}.pth.tar')
    torch.save(ckpt_dict, ckpt_path)
    print(f'Saved model to {ckpt_path}')

    import pdb
    pdb.set_trace()

    return model