Ejemplo n.º 1
0
def test_simple(args):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    g_n_blocks = 8
    G = Generator(args.input_nc,
                  args.output_nc,
                  args.ngf,
                  args.nz,
                  n_blocks=g_n_blocks).to(device)
    G.load_state_dict(
        torch.load(args.modelG_state_path,
                   map_location=lambda storage, loc: storage))
    G.eval()

    input_img = Image.open(args.input_img_path).convert('RGB')
    input_tensor = get_input_tensor(input_img).unsqueeze(0).to(device)

    z_random = sample_z(1, args.nz, 'gauss').to(device)

    with torch.no_grad():
        now = time.time()
        out = G(input_tensor, z_random)
        end = time.time()
        print('elapsed: {}'.format(end - now))

    out_denormalized = denormalize(out.squeeze()).cpu()
    out_img = toPIL(out_denormalized)
    out_img.show()
def _combined_threshold(image):
    color_thresholded, h_thresholded, l_thresholded, s_thresholded = color_threshold(image)
    xy_thresholded, x_thresholded, y_thresholded = absolute_threshold_(image)
    gd_thresholded, g_thresholded, d_thresholded = gradient_threshold(image)

    combined = (color_thresholded == 1) | (xy_thresholded == 1) | (g_thresholded == 1)
    combined = util.denormalize(combined)

    return combined, color_thresholded, xy_thresholded, g_thresholded
Ejemplo n.º 3
0
def test_recursive(args):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    g_n_blocks = 8
    G = Generator(args.input_nc,
                  args.output_nc,
                  args.ngf,
                  args.nz,
                  n_blocks=g_n_blocks).to(device)
    G.load_state_dict(
        torch.load(args.modelG_state_path,
                   map_location=lambda storage, loc: storage))
    G.eval()

    input_img = Image.open(args.input_img_path).convert('RGB')
    input_tensor = get_input_tensor(input_img).unsqueeze(0).to(device)

    z_random = sample_z(1, args.nz, 'gauss').to(device)

    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        with torch.no_grad():
            now = time.time()
            out = G(input_tensor, z_random)
            end = time.time()
            print('elapsed: {}'.format(end - now))

            out_denormalized = denormalize(out.squeeze())
            out_denormalized = out_denormalized.cpu().numpy().transpose(
                1, 2, 0)
            out_denormalized = out_denormalized[:, :, ::-1]
            cv2.imshow('Result', out_denormalized)

            input_tensor = out
            z_random = sample_z(1, args.nz, 'gauss').to(device)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cv2.destroyAllWindows()
Ejemplo n.º 4
0
    def save_video(self, video_dir, global_step):
        output_dir = os.path.join(video_dir, 'step_{}'.format(global_step))
        os.mkdir(output_dir)

        input_img = Image.open('imgs/test.png').convert('RGB').resize(
            (self.img_size, self.img_size), Image.BICUBIC)
        input_tensor = get_input_tensor(input_img).unsqueeze(0).to(self.device)

        self.G.eval()
        for i in range(450):
            with torch.no_grad():
                out = self.G(input_tensor)

            out_denormalized = denormalize(out.squeeze()).cpu()
            out_img = toPIL(out_denormalized)
            out_img.save('{0}/{1:04d}.png'.format(output_dir, i))

            input_tensor = out

        self.G.train()

        cmd = 'ffmpeg -r 30 -i {}/%04d.png -vcodec libx264 -pix_fmt yuv420p -r 30 {}/movie.mp4'.format(
            output_dir, output_dir)
        subprocess.call(cmd.split())
Ejemplo n.º 5
0
    def optimize(self, A, B, global_step):
        A = A.to(self.device)
        B = B.to(self.device)

        # Logging the input images
        if global_step % self.log_freq == 0:
            log_real_A = torchvision.utils.make_grid(A)
            log_real_A = denormalize(log_real_A)
            self.writer.add_image('real_A', log_real_A, global_step)

            log_real_B = torchvision.utils.make_grid(B)
            log_real_B = denormalize(log_real_B)
            self.writer.add_image('real_B', log_real_B, global_step)

        # Forward pass
        fake_B = self.G(A)

        if global_step % self.log_freq == 0:
            log_fake_B = torchvision.utils.make_grid(fake_B)
            log_fake_B = denormalize(log_fake_B)
            self.writer.add_image('fake_B', log_fake_B, global_step)

        # ==================================================================
        # 1. Train D
        # ==================================================================
        self._set_requires_grad(self.D, True)

        # Real
        real_pair = torch.cat([A, B], dim=1)
        real_D = self.D(real_pair)
        loss_real_D = gan_loss(real_D, target=1)

        # Fake
        fake_pair = torch.cat([A, fake_B], dim=1)
        fake_D = self.D(fake_pair.detach())
        loss_fake_D = gan_loss(fake_D, target=0)

        loss_D = (loss_real_D + loss_fake_D) * 0.5

        self._all_zero_grad()
        loss_D.backward()
        self.optim_D.step()

        # Logging
        self.writer.add_scalar('loss/loss_D', loss_D.item(), global_step)

        # ==================================================================
        # 2. Train G
        # ==================================================================
        self._set_requires_grad(self.D, False)

        # Fake
        fake_D2 = self.D(fake_pair)

        loss_G_GAN = gan_loss(fake_D2, target=1)
        loss_G_L1 = l1_loss(fake_B, B)
        loss_G = loss_G_GAN + loss_G_L1 * self.lambda_l1

        self._all_zero_grad()
        loss_G.backward()
        self.optim_G.step()

        # Logging
        self.writer.add_scalar('loss/loss_G_GAN', loss_G_GAN.item(),
                               global_step)
        self.writer.add_scalar('loss/loss_G_L1', loss_G_L1.item(), global_step)
        self.writer.add_scalar('loss/loss_G', loss_G.item(), global_step)
Ejemplo n.º 6
0
    def optimize(self, A, B, global_step):
        if A.size(0) <= 1:
            return

        A = A.to(self.device)
        B = B.to(self.device)

        cVAE_data = {'A': A[0:self.half_size], 'B': B[0:self.half_size]}
        cLR_data = {'A': A[self.half_size:], 'B': B[self.half_size:]}

        # Logging the input images
        log_imgs = torch.cat([cVAE_data['A'], cVAE_data['B']], 0)
        log_imgs = torchvision.utils.make_grid(log_imgs)
        log_imgs = denormalize(log_imgs)
        self.writer.add_image('cVAE_input', log_imgs, global_step)

        log_imgs = torch.cat([cLR_data['A'], cLR_data['B']], 0)
        log_imgs = torchvision.utils.make_grid(log_imgs)
        log_imgs = denormalize(log_imgs)
        self.writer.add_image('cLR_input', log_imgs, global_step)

        # ----------------------------------------------------------------
        # 1. Train D
        # ----------------------------------------------------------------

        # -----------------------------
        # Optimize D in cVAE-GAN
        # -----------------------------
        # Generate encoded latent vector
        mu, logvar = self.E(cVAE_data['B'])
        std = torch.exp(logvar / 2)
        random_z = sample_z(self.half_size, self.nz, 'gauss').to(self.device)
        encoded_z = (random_z * std) + mu

        # Generate fake image
        fake_img_cVAE = self.G(cVAE_data['A'], encoded_z)
        log_imgs = torchvision.utils.make_grid(fake_img_cVAE)
        log_imgs = denormalize(log_imgs)
        self.writer.add_image('cVAE_fake_encoded', log_imgs, global_step)

        real_pair_cVAE = torch.cat([cVAE_data['A'], cVAE_data['B']], dim=1)
        fake_pair_cVAE = torch.cat([cVAE_data['A'], fake_img_cVAE], dim=1)

        real_D_cVAE_1, real_D_cVAE_2 = self.D_cVAE(real_pair_cVAE)
        fake_D_cVAE_1, fake_D_cVAE_2 = self.D_cVAE(fake_pair_cVAE.detach())

        # The loss for small patch & big patch
        loss_D_cVAE_1 = mse_loss(real_D_cVAE_1, target=1) + mse_loss(
            fake_D_cVAE_1, target=0)
        loss_D_cVAE_2 = mse_loss(real_D_cVAE_2, target=1) + mse_loss(
            fake_D_cVAE_2, target=0)

        self.writer.add_scalar('loss/loss_D_cVAE_1', loss_D_cVAE_1.item(),
                               global_step)
        self.writer.add_scalar('loss/loss_D_cVAE_2', loss_D_cVAE_2.item(),
                               global_step)

        # -----------------------------
        # Optimize D in cLR-GAN
        # -----------------------------
        # Generate fake image
        fake_img_cLR = self.G(cLR_data['A'], random_z)
        log_imgs = torchvision.utils.make_grid(fake_img_cLR)
        log_imgs = denormalize(log_imgs)
        self.writer.add_image('cLR_fake_random', log_imgs, global_step)

        real_pair_cLR = torch.cat([cLR_data['A'], cLR_data['B']], dim=1)
        fake_pair_cLR = torch.cat([cVAE_data['A'], fake_img_cLR], dim=1)

        real_D_cLR_1, real_D_cLR_2 = self.D_cLR(real_pair_cLR)
        fake_D_cLR_1, fake_D_cLR_2 = self.D_cLR(fake_pair_cLR.detach())

        # Loss for small patch & big patch
        loss_D_cLR_1 = mse_loss(real_D_cLR_1, target=1) + mse_loss(
            fake_D_cLR_1, target=0)
        loss_D_cLR_2 = mse_loss(real_D_cLR_2, target=1) + mse_loss(
            fake_D_cLR_2, target=0)

        self.writer.add_scalar('loss/loss_D_cVAE_1', loss_D_cVAE_1.item(),
                               global_step)
        self.writer.add_scalar('loss/loss_D_cVAE_2', loss_D_cVAE_2.item(),
                               global_step)

        loss_D = loss_D_cVAE_1 + loss_D_cVAE_2 + loss_D_cLR_1 + loss_D_cLR_2
        self.writer.add_scalar('loss/loss_D', loss_D.item(), global_step)

        # -----------------------------
        # Update D
        # -----------------------------
        # set_requires_grad([], False)
        self.all_zero_grad()
        loss_D.backward()
        self.optim_D_cVAE.step()
        self.optim_D_cLR.step()

        # ----------------------------------------------------------------
        # 2. Train G & E
        # ----------------------------------------------------------------

        # -----------------------------
        # GAN loss
        # -----------------------------
        # Generate encoded latent vector
        mu, logvar = self.E(cVAE_data['B'])
        std = torch.exp(logvar / 2)
        random_z = sample_z(self.half_size, self.nz, 'gauss').to(self.device)
        encoded_z = (random_z * std) + mu

        # Generate fake image
        fake_img_cVAE = self.G(cVAE_data['A'], encoded_z)
        # self.writer.add_images('cVAE_output', fake_img_cVAE.add(1.0).mul(0.5), global_step)
        fake_pair_cVAE = torch.cat([cVAE_data['A'], fake_img_cVAE], dim=1)

        # Fool D_cVAE
        fake_D_cVAE_1, fake_D_cVAE_2 = self.D_cVAE(fake_pair_cVAE)

        # Loss for small patch & big patch
        loss_G_cVAE_1 = mse_loss(fake_D_cVAE_1, target=1)
        loss_G_cVAE_2 = mse_loss(fake_D_cVAE_2, target=1)

        # Random latent vector and generate fake image
        random_z = sample_z(self.half_size, self.nz, 'gauss').to(self.device)
        fake_img_cLR = self.G(cLR_data['A'], random_z)
        fake_pair_cLR = torch.cat([cLR_data['A'], fake_img_cLR], dim=1)

        # Fool D_cLR
        fake_D_cLR_1, fake_D_cLR_2 = self.D_cLR(fake_pair_cLR)

        # Loss for small patch & big patch
        loss_G_cLR_1 = mse_loss(fake_D_cLR_1, target=1)
        loss_G_cLR_2 = mse_loss(fake_D_cLR_2, target=1)

        loss_G = loss_G_cVAE_1 + loss_G_cVAE_2 + loss_G_cLR_1 + loss_G_cLR_2
        self.writer.add_scalar('loss/loss_G', loss_G.item(), global_step)

        # -----------------------------
        # KL-divergence (cVAE-GAN)
        # -----------------------------
        kl_div = torch.sum(
            0.5 * (mu**2 + torch.exp(logvar) - logvar - 1)) * self.lambda_kl
        self.writer.add_scalar('loss/kl_div', kl_div.item(), global_step)

        # -----------------------------
        # Reconstruction of image B (|G(A, z) - B|) (cVAE-GAN)
        # -----------------------------
        loss_img_recon = l1_loss(fake_img_cVAE,
                                 cVAE_data['B']) * self.lambda_img
        self.writer.add_scalar('loss/loss_img_recon', loss_img_recon.item(),
                               global_step)

        loss_E_G = loss_G + kl_div + loss_img_recon
        self.writer.add_scalar('loss/loss_E_G', loss_E_G.item(), global_step)

        # -----------------------------
        # Update E & G
        # -----------------------------
        self.all_zero_grad()
        loss_E_G.backward(retain_graph=True)
        self.optim_E.step()
        self.optim_G.step()

        # ----------------------------------------------------------------
        # 3. Train only G
        # ----------------------------------------------------------------

        # -----------------------------
        # Reconstruction of random latent code (|E(G(A, z)) - z|) (cLR-GAN)
        # -----------------------------
        # This step should update only G.
        # See https://github.com/junyanz/BicycleGAN/issues/5 for details.
        mu, logvar = self.E(fake_img_cLR)

        loss_z_recon = l1_loss(mu, random_z) * self.lambda_z
        self.writer.add_scalar('loss/loss_z_recon', loss_z_recon.item(),
                               global_step)

        # -----------------------------
        # Update G
        # -----------------------------
        self.all_zero_grad()
        loss_z_recon.backward()
        self.optim_G.step()
Ejemplo n.º 7
0
def main():
    args = parse_arg()
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    writer = SummaryWriter(os.path.join(args.save_dir, 'tb'))
    if args.use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    model = net.InPainting(args.use_cuda, mask_clipping=False).to(device)
    model_dis = net.Discriminator().to(device)
    model.train()
    model_dis.train()
    train_params = [
        p for (n, p) in model.named_parameters() if 'fine_painter' not in n
    ]
    optimizer = torch.optim.Adam(train_params, lr=args.lr)
    optimizer_dis = torch.optim.Adam(model_dis.parameters(), lr=args.lr)
    if args.resume:
        print('Resume training from ', args.resume)
        ckpt = torch.load(args.resume)
        try:
            model.load_state_dict(ckpt['ckpt'])
            model.load_state_dict(ckpt['ckpt_dis'])
            optimizer.load_state_dict(ckpt['optim'])
            optimizer_dis.load_state_dict(ckpt['optim_dis'])
        except Exception as e:
            print(traceback.format_exc())
            print('Missing keys')
            model.load_state_dict(
                {k: v
                 for k, v in ckpt['ckpt'].items() if 'encoder' in k},
                strict=False)
    if args.fix_mask:
        print('fix mask prediction')
        for p in model.encoder.parameters():
            p.requires_grad = False
    elif args.fix_recon:
        print('fix painter')
        for p in model.painter.parameters():
            p.requires_grad = False

    loss = torch.nn.L1Loss()
    loss_bce = torch.nn.BCELoss()
    ssim_window = ssim.create_window(11, 3).to(device)

    #data = dataset.Dataset(args.path)
    data = dataset.LargeScaleWatermarkDataset(
        folder_origin=os.path.join(args.path, args.path_origin),
        folder_watermarked=os.path.join(args.path, args.path_wm),
        anno_file=os.path.join(args.path, args.path_anno))
    train_loader = torch.utils.data.DataLoader(dataset=data,
                                               batch_size=args.batchsize,
                                               shuffle=True,
                                               num_workers=4)

    try:
        batch_per_epoch = len(train_loader)
        best_loss = 100
        for i in range(args.epochs):
            epoch_loss = 0
            print('Epoch %d' % i)
            for j, item in enumerate(train_loader):
                img_raw, img_wm, mask_wm = item
                img_raw, img_wm, mask_wm = img_raw.to(device), img_wm.to(
                    device), mask_wm.to(device)
                mask, recon = model(img_wm)
                # 加入discriminator
                if args.gan_method:
                    # optimize D
                    dis_real, dis_recon = dis_forward(model_dis, img_raw,
                                                      recon.detach())
                    dis_wm = torch.sigmoid(model_dis(img_wm))
                    assert dis_recon.size() == dis_wm.size()
                    dis_fake = 0.5 * (dis_recon + dis_wm)
                    loss_disc = torch.mean(-1 * torch.log(1 - dis_fake) -
                                           torch.log(dis_real))
                    loss_gp = net.calc_gradient_penalty(
                        model_dis, img_raw, recon.detach())
                    loss_d = loss_gp + loss_disc
                    # optimize G through D
                    dis_real, dis_recon = dis_forward(model_dis, img_raw,
                                                      recon)
                    # print('dis_real:', dis_real.size(), 'dis_recon:', dis_recon.size())
                    loss_g = 0.001 * torch.mean(-1 * torch.log(dis_recon))

                loss_mask_reg = 0.1 * mask.clamp(0, 1).mean()
                # loss_mask = 1000*util.exclusion_loss(mask)
                try:
                    loss_mask = loss_bce(mask.clamp(0., 1.),
                                         mask_wm.float().clamp(0., 1.))
                except Exception:
                    import pdb
                    pdb.set_trace()
                    if not (mask >= 0. & mask <= 1.).all():
                        print('错误出在生成的mask')
                    if not (mask_wm >= 0. & mask_wm <= 1.).all():
                        print('错误出在gt水印')
                loss_recon = loss(recon, img_raw)
                loss_ssim = 1 - ssim._ssim(0.5 * (1 + img_raw), 0.5 *
                                           (1 + recon), ssim_window, 11, 3,
                                           True)
                loss_weighted_recon = util.weighted_l1(recon, img_raw, mask)
                loss_ = loss_recon + loss_mask + loss_ssim
                if args.gan_method:
                    loss_ += loss_g
                    optimizer_dis.zero_grad()
                    loss_d.backward()
                    optimizer_dis.step()

                optimizer.zero_grad()
                loss_.backward()
                optimizer.step()

                epoch_loss += loss_.item()
                step = i * batch_per_epoch + j
                if j % 5 == 0:
                    writer.add_scalars(
                        'loss', {
                            'recon_l1': loss_recon.item(),
                            'ssim': loss_ssim.item(),
                            'exclusion': loss_mask.item(),
                            'mask_reg': loss_mask_reg.item()
                        }, step)
                if j % 10 == 0:
                    print(
                        'Loss: %.3f (recon: %.3f \t ssim: %.3f \t mask: %.3f \t)'
                        % (loss_.item(), loss_recon.item(), loss_ssim.item(),
                           loss_mask.item()))
                    if args.gan_method:
                        print(
                            'disc: %.3f \t gp: %.3f \t gen: %.3f' %
                            (loss_disc.item(), loss_gp.item(), loss_g.item()))
                # 记录mask和原图
                if j % 50 == 0:
                    #import pdb; pdb.set_trace()
                    writer.add_images('images', [
                        torch.cat(3 * [mask[0].float().to(device)]),
                        torch.cat(
                            3 * [mask_wm[0].float().to(device).unsqueeze(0)]),
                        util.denormalize(img_wm[0]),
                        util.denormalize(recon[0]).clamp(0, 1)
                    ],
                                      global_step=step,
                                      dataformats='CHW')
                    '''
                    writer.add_image('mask', mask[0], step)
                    writer.add_image('img', util.denormalize(img_wm[0]), step)
                    writer.add_image('recon_c', util.denormalize(recon_coarse[0]).clamp(0, 1), step)
                    writer.add_image('recon_f', util.denormalize(recon_fine[0]).clamp(0, 1), step)
                    writer.add_image('mask_gt', mask_wm[0], step)
                    '''
                # 画各层的梯度分布图
                if j % 100 == 0:
                    writer.add_figure('grad_flow',
                                      util.plot_grad_flow_v2(
                                          model.named_parameters()),
                                      global_step=step)
                    if args.gan_method:
                        writer.add_figure('discriminator_grad',
                                          util.plot_grad_flow_v2(
                                              model_dis.named_parameters()),
                                          global_step=step)
            ckpt = {
                'ckpt': model.state_dict(),
                'optim': optimizer.state_dict()
            }
            torch.save(ckpt, os.path.join(args.save_dir, 'latest.pth'))
            # 记录所有最好的epoch weight
            if epoch_loss / (j + 1) < best_loss:
                best_loss = epoch_loss / (j + 1)
                shutil.copy(
                    os.path.join(args.save_dir, 'latest.pth'),
                    os.path.join(args.save_dir, 'epoch_' + str(i) + '.pth'))
    except Exception as e:
        ckpt = {'ckpt': model.state_dict(), 'optim': optimizer.state_dict()}
        torch.save(ckpt, os.path.join(args.save_dir, 'latest.pth'))
        print('Save temporary checkpoints to %s' % args.save_dir)
        print(str(e), traceback.print_exc())
        sys.exit(0)
    print('Done training.')
    shutil.copyfile(os.path.join(args.save_dir, 'latest.pth'),
                    os.path.join(args.save_dir, 'epoch_%d.pth' % (i + 1)))
Ejemplo n.º 8
0
    def PredictMotion(self):
        print('Motion: ')
        P_m = ConditionalMotionNet()
        param = torch.load(self.model_path + '/PMNet_weight_' +
                           self.model_epoch + '.pth')
        P_m.load_state_dict(param)
        if self.gpu > -1:
            P_m.cuda(self.gpu)

        with open(self.model_path + '/codebook_m_' + self.model_epoch + '.pkl',
                  'rb') as f:
            codebook_m = pickle.load(
                f) if sys.version_info[0] == 2 else pickle.load(
                    f, encoding='latin1')

        id1 = int(np.floor((len(codebook_m) - 1) * self.t_m))
        id2 = int(np.ceil((len(codebook_m) - 1) * self.t_m))
        z_weight = (len(codebook_m) - 1) * self.t_m - np.floor(
            (len(codebook_m) - 1) * self.t_m)
        z_m = (1. - z_weight
               ) * codebook_m[id1:id1 + 1] + z_weight * codebook_m[id2:id2 + 1]
        z_m = Variable(torch.from_numpy(z_m.astype(np.float32)))
        if self.gpu > -1:
            z_m = z_m.cuda(self.gpu)
        initial_coordinate = np.array([
            np.meshgrid(np.linspace(-1, 1, self.w + 2 * self.pad),
                        np.linspace(-1, 1, self.h + 2 * self.pad),
                        sparse=False)
        ]).astype(np.float32)
        initial_coordinate = Variable(torch.from_numpy(initial_coordinate))
        if self.gpu > -1:
            initial_coordinate = initial_coordinate.cuda(self.gpu)

        with torch.no_grad():

            test_img = cv2.imread(self.input_path)
            test_img = cv2.resize(test_img, (self.w, self.h))
            test_input = np.array([normalize(test_img)])
            test_input = Variable(
                torch.from_numpy(test_input.transpose(0, 3, 1, 2)))
            if self.gpu > -1:
                test_input = test_input.cuda(self.gpu)
            padded_test_input = F.pad(test_input,
                                      (self.pad, self.pad, self.pad, self.pad),
                                      mode='reflect')

            test_img_large = cv2.imread(self.input_path)
            if self.fw == None or self.fh == None:
                self.fh, self.fw = test_img_large.shape[:2]
            test_img_large = cv2.resize(test_img_large, (self.fw, self.fh))
            padded_test_input_large = np.array([normalize(test_img_large)])
            padded_test_input_large = Variable(
                torch.from_numpy(padded_test_input_large.transpose(0, 3, 1,
                                                                   2)))
            if self.gpu > -1:
                padded_test_input_large = padded_test_input_large.cuda(
                    self.gpu)
            scaled_pads = (int(self.pad * self.fh / float(self.h)),
                           int(self.pad * self.fw / float(self.w)))
            padded_test_input_large = F.pad(padded_test_input_large,
                                            (scaled_pads[1], scaled_pads[1],
                                             scaled_pads[0], scaled_pads[0]),
                                            mode='reflect')

            V_m = list()
            V_f = list()
            old_correpondence = None
            for t in range(self.TM):
                sys.stdout.write("\rProcessing frame %d, " % (t + 1))
                sys.stdout.flush()

                flow = P_m(test_input, z_m)
                flow[:, 0, :, :] = flow[:, 0, :, :] * (
                    self.w / float(self.pad * 2 + self.w))
                flow[:, 1, :, :] = flow[:, 1, :, :] * (
                    self.h / float(self.pad * 2 + self.h))
                flow = F.pad(flow, (self.pad, self.pad, self.pad, self.pad),
                             mode='reflect')
                flow = self.s_m * flow
                correspondence = initial_coordinate + flow

                if old_correpondence is not None:
                    correspondence = F.grid_sample(old_correpondence,
                                                   correspondence.permute(
                                                       0, 2, 3, 1),
                                                   padding_mode='border')

                correspondence_large = F.upsample(
                    correspondence,
                    size=(self.fh + scaled_pads[0] * 2,
                          self.fw + scaled_pads[1] * 2),
                    mode='bilinear',
                    align_corners=True)
                y_large = F.grid_sample(padded_test_input_large,
                                        correspondence_large.permute(
                                            0, 2, 3, 1),
                                        padding_mode='border')
                outimg = y_large.data.cpu().numpy()[0].transpose(1, 2, 0)
                outimg = denormalize(outimg)
                outimg = outimg[scaled_pads[0]:outimg.shape[0] -
                                scaled_pads[0],
                                scaled_pads[1]:outimg.shape[1] -
                                scaled_pads[1]]
                V_m.append(outimg)

                outflowimg = flow.data.cpu().numpy()[0].transpose(1, 2, 0)
                outflowimg = outflowimg[self.pad:outflowimg.shape[0] -
                                        self.pad,
                                        self.pad:outflowimg.shape[1] -
                                        self.pad]
                mag, ang = cv2.cartToPolar(outflowimg[..., 1], outflowimg[...,
                                                                          0])
                hsv = np.zeros_like(test_img)
                hsv[..., 1] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
                hsv[..., 0] = ang * 180 / np.pi / 2
                hsv[..., 2] = 255
                outflowimg = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
                outflowimg = cv2.resize(outflowimg, (self.fw, self.fh))
                V_f.append(outflowimg)

                y = F.grid_sample(padded_test_input,
                                  correspondence.permute(0, 2, 3, 1),
                                  padding_mode='border')
                test_input = y[:, :, self.pad:y.shape[2] - self.pad,
                               self.pad:y.shape[3] - self.pad]
                old_correpondence = correspondence

            V_mloop = generateLoop(V_m)

        return V_mloop, V_f
Ejemplo n.º 9
0
    def PredictAppearance(self, V_mloop):
        print('\nAppearance: ', )
        minimum_loop_num = int(1 / self.s_a)
        P_a = ConditionalAppearanceNet(8)
        param = torch.load(self.model_path + '/PANet_weight_' +
                           self.model_epoch + '.pth')
        P_a.load_state_dict(param)
        if self.gpu > -1:
            P_a.cuda(self.gpu)
        E_a = define_E(3, 8, 64, which_model_netE='resnet_128', vaeLike=True)
        param = torch.load(self.model_path + '/EANet_weight_' +
                           self.model_epoch + '.pth')
        E_a.load_state_dict(param)
        if self.gpu > -1:
            E_a.cuda(self.gpu)

        with torch.no_grad():
            interpolated_za_seq = list()

            input_conditional_test = cv2.resize(V_mloop[0], (128, 128))
            input_conditional_test = np.array(
                [normalize(input_conditional_test)])
            input_conditional_test = Variable(
                torch.from_numpy(input_conditional_test.transpose(0, 3, 1, 2)))
            if self.gpu > -1:
                input_conditional_test = input_conditional_test.cuda(self.gpu)
            za_input, _ = E_a(input_conditional_test)
            interpolated_za_seq.append(za_input.clone())

            with open(
                    self.model_path + '/codebook_a_' + self.model_epoch +
                    '.pkl', 'rb') as f:
                codebook_a = pickle.load(
                    f) if sys.version_info[0] == 2 else pickle.load(
                        f, encoding='latin1')
            za_seq = codebook_a[int((len(codebook_a) - 1) * self.t_a)]
            za_seq = [torch.from_numpy(np.array([za])) for za in za_seq]
            if self.gpu > -1:
                za_seq = [za.cuda(self.gpu) for za in za_seq]
            start_fid = None
            min_dist = float('inf')
            for t, mu in enumerate(za_seq):
                dist = F.mse_loss(za_input, mu).cpu().numpy()
                if dist < min_dist:
                    min_dist = dist
                    start_fid = t

            TA = len(za_seq)
            loop_num = max(minimum_loop_num,
                           int(np.ceil(len(za_seq) / float(len(V_mloop)))))
            interpolation_size = int((loop_num * len(V_mloop) - TA) / TA)
            za1 = za_input.clone()
            for t in range(start_fid + 1, TA):
                za2 = za_seq[t]
                for ti in range(interpolation_size):
                    lambd = (ti + 1) / float(interpolation_size + 1)
                    z = (1. - lambd) * za1 + lambd * za2
                    interpolated_za_seq.append(z)
                interpolated_za_seq.append(za2)
                za1 = za2

            za1 = za_input.clone()
            for t in range(start_fid - 1, -1, -1):
                za2 = za_seq[t]
                for ti in range(interpolation_size - 1, -1, -1):
                    lambd = (ti + 1) / float(interpolation_size + 1)
                    z = (1. - lambd) * za2 + lambd * za1
                    interpolated_za_seq.insert(0, z)
                interpolated_za_seq.insert(0, za2)
                za1 = za2

            loop_num = int(
                np.ceil(TA * (interpolation_size + 1) / float(len(V_mloop))))
            interpolation_size2 = int(interpolation_size +
                                      loop_num * len(V_mloop) - TA *
                                      (interpolation_size + 1))
            z_start = za_input.clone() if start_fid == 0 else za_seq[0]
            z_final = za_input.clone() if start_fid == TA - 1 else za_seq[-1]
            for ti in range(interpolation_size2):
                lambd = (ti + 1) / float(interpolation_size2 + 1)
                z = (1. - lambd) * z_final + lambd * z_start
                interpolated_za_seq.append(z)

            zaid = (interpolation_size + 1) * start_fid
            V = list()
            t = 0
            for loop in range(loop_num):
                for frame in V_mloop:
                    sys.stdout.write("\rProcessing frame %d, " % (t + 1))
                    sys.stdout.flush()
                    t += 1

                    test_input = cv2.resize(frame, (self.w, self.h))
                    test_input = np.array([normalize(test_input)])
                    test_input = Variable(
                        torch.from_numpy(test_input.transpose(0, 3, 1, 2)))
                    if self.gpu > -1:
                        test_input = test_input.cuda(self.gpu)
                    test_input_large = np.array([normalize(frame)])
                    test_input_large = Variable(
                        torch.from_numpy(test_input_large.transpose(
                            0, 3, 1, 2)))
                    if self.gpu > -1:
                        test_input_large = test_input_large.cuda(self.gpu)
                    z = interpolated_za_seq[zaid]
                    y, al, bl = P_a(test_input, z)
                    al_large = F.upsample(al,
                                          size=(self.fh, self.fw),
                                          mode='bilinear',
                                          align_corners=True)
                    bl_large = F.upsample(bl,
                                          size=(self.fh, self.fw),
                                          mode='bilinear',
                                          align_corners=True)
                    y = F.tanh(al_large * test_input_large + bl_large)
                    outimg = y.data.cpu().numpy()[0].transpose(1, 2, 0)
                    V.append(denormalize(outimg))
                    zaid += 1
                    if zaid > len(interpolated_za_seq) - 1:
                        zaid = 0

        return V
Ejemplo n.º 10
0
kl_loss = -0.5 * K.sum(
    1 + z_log_var - K.square(z_mean) - K.square(K.exp(z_log_var)), axis=-1)
# return the average loss over all images in batch
vae_loss = K.mean(reconstruction_loss + kl_loss)

# Compile
reduce_lr = ReduceLROnPlateau(monitor='loss',
                              factor=0.2,
                              patience=5,
                              min_lr=0.001)
model.add_loss(vae_loss)
optimizer = optimizers.Adam(lr=learning_rate,
                            beta_1=0.9,
                            beta_2=0.999,
                            epsilon=None,
                            decay=0.0,
                            amsgrad=False)

model.compile(optimizer=optimizer)

#checkpoint = ModelCheckpoint(filepath='x_best_weight.hdf5', verbose=1, save_best_only=True)

#for epoch in range(epochs):
history = model.fit(X_train, batch_size=batch_size, epochs=epochs)

# Predicting
pred = model.predict(X[10000:15000], batch_size=20)

pred = denormalize(pred, X_max, X_min).flatten()
set_audio('{}_{}_epoch.wav'.format(name, pct), rate, pred)