def test_simple(args): device = 'cuda:0' if torch.cuda.is_available() else 'cpu' g_n_blocks = 8 G = Generator(args.input_nc, args.output_nc, args.ngf, args.nz, n_blocks=g_n_blocks).to(device) G.load_state_dict( torch.load(args.modelG_state_path, map_location=lambda storage, loc: storage)) G.eval() input_img = Image.open(args.input_img_path).convert('RGB') input_tensor = get_input_tensor(input_img).unsqueeze(0).to(device) z_random = sample_z(1, args.nz, 'gauss').to(device) with torch.no_grad(): now = time.time() out = G(input_tensor, z_random) end = time.time() print('elapsed: {}'.format(end - now)) out_denormalized = denormalize(out.squeeze()).cpu() out_img = toPIL(out_denormalized) out_img.show()
def _combined_threshold(image): color_thresholded, h_thresholded, l_thresholded, s_thresholded = color_threshold(image) xy_thresholded, x_thresholded, y_thresholded = absolute_threshold_(image) gd_thresholded, g_thresholded, d_thresholded = gradient_threshold(image) combined = (color_thresholded == 1) | (xy_thresholded == 1) | (g_thresholded == 1) combined = util.denormalize(combined) return combined, color_thresholded, xy_thresholded, g_thresholded
def test_recursive(args): device = 'cuda:0' if torch.cuda.is_available() else 'cpu' g_n_blocks = 8 G = Generator(args.input_nc, args.output_nc, args.ngf, args.nz, n_blocks=g_n_blocks).to(device) G.load_state_dict( torch.load(args.modelG_state_path, map_location=lambda storage, loc: storage)) G.eval() input_img = Image.open(args.input_img_path).convert('RGB') input_tensor = get_input_tensor(input_img).unsqueeze(0).to(device) z_random = sample_z(1, args.nz, 'gauss').to(device) cap = cv2.VideoCapture(0) while cap.isOpened(): with torch.no_grad(): now = time.time() out = G(input_tensor, z_random) end = time.time() print('elapsed: {}'.format(end - now)) out_denormalized = denormalize(out.squeeze()) out_denormalized = out_denormalized.cpu().numpy().transpose( 1, 2, 0) out_denormalized = out_denormalized[:, :, ::-1] cv2.imshow('Result', out_denormalized) input_tensor = out z_random = sample_z(1, args.nz, 'gauss').to(device) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def save_video(self, video_dir, global_step): output_dir = os.path.join(video_dir, 'step_{}'.format(global_step)) os.mkdir(output_dir) input_img = Image.open('imgs/test.png').convert('RGB').resize( (self.img_size, self.img_size), Image.BICUBIC) input_tensor = get_input_tensor(input_img).unsqueeze(0).to(self.device) self.G.eval() for i in range(450): with torch.no_grad(): out = self.G(input_tensor) out_denormalized = denormalize(out.squeeze()).cpu() out_img = toPIL(out_denormalized) out_img.save('{0}/{1:04d}.png'.format(output_dir, i)) input_tensor = out self.G.train() cmd = 'ffmpeg -r 30 -i {}/%04d.png -vcodec libx264 -pix_fmt yuv420p -r 30 {}/movie.mp4'.format( output_dir, output_dir) subprocess.call(cmd.split())
def optimize(self, A, B, global_step): A = A.to(self.device) B = B.to(self.device) # Logging the input images if global_step % self.log_freq == 0: log_real_A = torchvision.utils.make_grid(A) log_real_A = denormalize(log_real_A) self.writer.add_image('real_A', log_real_A, global_step) log_real_B = torchvision.utils.make_grid(B) log_real_B = denormalize(log_real_B) self.writer.add_image('real_B', log_real_B, global_step) # Forward pass fake_B = self.G(A) if global_step % self.log_freq == 0: log_fake_B = torchvision.utils.make_grid(fake_B) log_fake_B = denormalize(log_fake_B) self.writer.add_image('fake_B', log_fake_B, global_step) # ================================================================== # 1. Train D # ================================================================== self._set_requires_grad(self.D, True) # Real real_pair = torch.cat([A, B], dim=1) real_D = self.D(real_pair) loss_real_D = gan_loss(real_D, target=1) # Fake fake_pair = torch.cat([A, fake_B], dim=1) fake_D = self.D(fake_pair.detach()) loss_fake_D = gan_loss(fake_D, target=0) loss_D = (loss_real_D + loss_fake_D) * 0.5 self._all_zero_grad() loss_D.backward() self.optim_D.step() # Logging self.writer.add_scalar('loss/loss_D', loss_D.item(), global_step) # ================================================================== # 2. Train G # ================================================================== self._set_requires_grad(self.D, False) # Fake fake_D2 = self.D(fake_pair) loss_G_GAN = gan_loss(fake_D2, target=1) loss_G_L1 = l1_loss(fake_B, B) loss_G = loss_G_GAN + loss_G_L1 * self.lambda_l1 self._all_zero_grad() loss_G.backward() self.optim_G.step() # Logging self.writer.add_scalar('loss/loss_G_GAN', loss_G_GAN.item(), global_step) self.writer.add_scalar('loss/loss_G_L1', loss_G_L1.item(), global_step) self.writer.add_scalar('loss/loss_G', loss_G.item(), global_step)
def optimize(self, A, B, global_step): if A.size(0) <= 1: return A = A.to(self.device) B = B.to(self.device) cVAE_data = {'A': A[0:self.half_size], 'B': B[0:self.half_size]} cLR_data = {'A': A[self.half_size:], 'B': B[self.half_size:]} # Logging the input images log_imgs = torch.cat([cVAE_data['A'], cVAE_data['B']], 0) log_imgs = torchvision.utils.make_grid(log_imgs) log_imgs = denormalize(log_imgs) self.writer.add_image('cVAE_input', log_imgs, global_step) log_imgs = torch.cat([cLR_data['A'], cLR_data['B']], 0) log_imgs = torchvision.utils.make_grid(log_imgs) log_imgs = denormalize(log_imgs) self.writer.add_image('cLR_input', log_imgs, global_step) # ---------------------------------------------------------------- # 1. Train D # ---------------------------------------------------------------- # ----------------------------- # Optimize D in cVAE-GAN # ----------------------------- # Generate encoded latent vector mu, logvar = self.E(cVAE_data['B']) std = torch.exp(logvar / 2) random_z = sample_z(self.half_size, self.nz, 'gauss').to(self.device) encoded_z = (random_z * std) + mu # Generate fake image fake_img_cVAE = self.G(cVAE_data['A'], encoded_z) log_imgs = torchvision.utils.make_grid(fake_img_cVAE) log_imgs = denormalize(log_imgs) self.writer.add_image('cVAE_fake_encoded', log_imgs, global_step) real_pair_cVAE = torch.cat([cVAE_data['A'], cVAE_data['B']], dim=1) fake_pair_cVAE = torch.cat([cVAE_data['A'], fake_img_cVAE], dim=1) real_D_cVAE_1, real_D_cVAE_2 = self.D_cVAE(real_pair_cVAE) fake_D_cVAE_1, fake_D_cVAE_2 = self.D_cVAE(fake_pair_cVAE.detach()) # The loss for small patch & big patch loss_D_cVAE_1 = mse_loss(real_D_cVAE_1, target=1) + mse_loss( fake_D_cVAE_1, target=0) loss_D_cVAE_2 = mse_loss(real_D_cVAE_2, target=1) + mse_loss( fake_D_cVAE_2, target=0) self.writer.add_scalar('loss/loss_D_cVAE_1', loss_D_cVAE_1.item(), global_step) self.writer.add_scalar('loss/loss_D_cVAE_2', loss_D_cVAE_2.item(), global_step) # ----------------------------- # Optimize D in cLR-GAN # ----------------------------- # Generate fake image fake_img_cLR = self.G(cLR_data['A'], random_z) log_imgs = torchvision.utils.make_grid(fake_img_cLR) log_imgs = denormalize(log_imgs) self.writer.add_image('cLR_fake_random', log_imgs, global_step) real_pair_cLR = torch.cat([cLR_data['A'], cLR_data['B']], dim=1) fake_pair_cLR = torch.cat([cVAE_data['A'], fake_img_cLR], dim=1) real_D_cLR_1, real_D_cLR_2 = self.D_cLR(real_pair_cLR) fake_D_cLR_1, fake_D_cLR_2 = self.D_cLR(fake_pair_cLR.detach()) # Loss for small patch & big patch loss_D_cLR_1 = mse_loss(real_D_cLR_1, target=1) + mse_loss( fake_D_cLR_1, target=0) loss_D_cLR_2 = mse_loss(real_D_cLR_2, target=1) + mse_loss( fake_D_cLR_2, target=0) self.writer.add_scalar('loss/loss_D_cVAE_1', loss_D_cVAE_1.item(), global_step) self.writer.add_scalar('loss/loss_D_cVAE_2', loss_D_cVAE_2.item(), global_step) loss_D = loss_D_cVAE_1 + loss_D_cVAE_2 + loss_D_cLR_1 + loss_D_cLR_2 self.writer.add_scalar('loss/loss_D', loss_D.item(), global_step) # ----------------------------- # Update D # ----------------------------- # set_requires_grad([], False) self.all_zero_grad() loss_D.backward() self.optim_D_cVAE.step() self.optim_D_cLR.step() # ---------------------------------------------------------------- # 2. Train G & E # ---------------------------------------------------------------- # ----------------------------- # GAN loss # ----------------------------- # Generate encoded latent vector mu, logvar = self.E(cVAE_data['B']) std = torch.exp(logvar / 2) random_z = sample_z(self.half_size, self.nz, 'gauss').to(self.device) encoded_z = (random_z * std) + mu # Generate fake image fake_img_cVAE = self.G(cVAE_data['A'], encoded_z) # self.writer.add_images('cVAE_output', fake_img_cVAE.add(1.0).mul(0.5), global_step) fake_pair_cVAE = torch.cat([cVAE_data['A'], fake_img_cVAE], dim=1) # Fool D_cVAE fake_D_cVAE_1, fake_D_cVAE_2 = self.D_cVAE(fake_pair_cVAE) # Loss for small patch & big patch loss_G_cVAE_1 = mse_loss(fake_D_cVAE_1, target=1) loss_G_cVAE_2 = mse_loss(fake_D_cVAE_2, target=1) # Random latent vector and generate fake image random_z = sample_z(self.half_size, self.nz, 'gauss').to(self.device) fake_img_cLR = self.G(cLR_data['A'], random_z) fake_pair_cLR = torch.cat([cLR_data['A'], fake_img_cLR], dim=1) # Fool D_cLR fake_D_cLR_1, fake_D_cLR_2 = self.D_cLR(fake_pair_cLR) # Loss for small patch & big patch loss_G_cLR_1 = mse_loss(fake_D_cLR_1, target=1) loss_G_cLR_2 = mse_loss(fake_D_cLR_2, target=1) loss_G = loss_G_cVAE_1 + loss_G_cVAE_2 + loss_G_cLR_1 + loss_G_cLR_2 self.writer.add_scalar('loss/loss_G', loss_G.item(), global_step) # ----------------------------- # KL-divergence (cVAE-GAN) # ----------------------------- kl_div = torch.sum( 0.5 * (mu**2 + torch.exp(logvar) - logvar - 1)) * self.lambda_kl self.writer.add_scalar('loss/kl_div', kl_div.item(), global_step) # ----------------------------- # Reconstruction of image B (|G(A, z) - B|) (cVAE-GAN) # ----------------------------- loss_img_recon = l1_loss(fake_img_cVAE, cVAE_data['B']) * self.lambda_img self.writer.add_scalar('loss/loss_img_recon', loss_img_recon.item(), global_step) loss_E_G = loss_G + kl_div + loss_img_recon self.writer.add_scalar('loss/loss_E_G', loss_E_G.item(), global_step) # ----------------------------- # Update E & G # ----------------------------- self.all_zero_grad() loss_E_G.backward(retain_graph=True) self.optim_E.step() self.optim_G.step() # ---------------------------------------------------------------- # 3. Train only G # ---------------------------------------------------------------- # ----------------------------- # Reconstruction of random latent code (|E(G(A, z)) - z|) (cLR-GAN) # ----------------------------- # This step should update only G. # See https://github.com/junyanz/BicycleGAN/issues/5 for details. mu, logvar = self.E(fake_img_cLR) loss_z_recon = l1_loss(mu, random_z) * self.lambda_z self.writer.add_scalar('loss/loss_z_recon', loss_z_recon.item(), global_step) # ----------------------------- # Update G # ----------------------------- self.all_zero_grad() loss_z_recon.backward() self.optim_G.step()
def main(): args = parse_arg() if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) writer = SummaryWriter(os.path.join(args.save_dir, 'tb')) if args.use_cuda: device = torch.device('cuda') else: device = torch.device('cpu') model = net.InPainting(args.use_cuda, mask_clipping=False).to(device) model_dis = net.Discriminator().to(device) model.train() model_dis.train() train_params = [ p for (n, p) in model.named_parameters() if 'fine_painter' not in n ] optimizer = torch.optim.Adam(train_params, lr=args.lr) optimizer_dis = torch.optim.Adam(model_dis.parameters(), lr=args.lr) if args.resume: print('Resume training from ', args.resume) ckpt = torch.load(args.resume) try: model.load_state_dict(ckpt['ckpt']) model.load_state_dict(ckpt['ckpt_dis']) optimizer.load_state_dict(ckpt['optim']) optimizer_dis.load_state_dict(ckpt['optim_dis']) except Exception as e: print(traceback.format_exc()) print('Missing keys') model.load_state_dict( {k: v for k, v in ckpt['ckpt'].items() if 'encoder' in k}, strict=False) if args.fix_mask: print('fix mask prediction') for p in model.encoder.parameters(): p.requires_grad = False elif args.fix_recon: print('fix painter') for p in model.painter.parameters(): p.requires_grad = False loss = torch.nn.L1Loss() loss_bce = torch.nn.BCELoss() ssim_window = ssim.create_window(11, 3).to(device) #data = dataset.Dataset(args.path) data = dataset.LargeScaleWatermarkDataset( folder_origin=os.path.join(args.path, args.path_origin), folder_watermarked=os.path.join(args.path, args.path_wm), anno_file=os.path.join(args.path, args.path_anno)) train_loader = torch.utils.data.DataLoader(dataset=data, batch_size=args.batchsize, shuffle=True, num_workers=4) try: batch_per_epoch = len(train_loader) best_loss = 100 for i in range(args.epochs): epoch_loss = 0 print('Epoch %d' % i) for j, item in enumerate(train_loader): img_raw, img_wm, mask_wm = item img_raw, img_wm, mask_wm = img_raw.to(device), img_wm.to( device), mask_wm.to(device) mask, recon = model(img_wm) # 加入discriminator if args.gan_method: # optimize D dis_real, dis_recon = dis_forward(model_dis, img_raw, recon.detach()) dis_wm = torch.sigmoid(model_dis(img_wm)) assert dis_recon.size() == dis_wm.size() dis_fake = 0.5 * (dis_recon + dis_wm) loss_disc = torch.mean(-1 * torch.log(1 - dis_fake) - torch.log(dis_real)) loss_gp = net.calc_gradient_penalty( model_dis, img_raw, recon.detach()) loss_d = loss_gp + loss_disc # optimize G through D dis_real, dis_recon = dis_forward(model_dis, img_raw, recon) # print('dis_real:', dis_real.size(), 'dis_recon:', dis_recon.size()) loss_g = 0.001 * torch.mean(-1 * torch.log(dis_recon)) loss_mask_reg = 0.1 * mask.clamp(0, 1).mean() # loss_mask = 1000*util.exclusion_loss(mask) try: loss_mask = loss_bce(mask.clamp(0., 1.), mask_wm.float().clamp(0., 1.)) except Exception: import pdb pdb.set_trace() if not (mask >= 0. & mask <= 1.).all(): print('错误出在生成的mask') if not (mask_wm >= 0. & mask_wm <= 1.).all(): print('错误出在gt水印') loss_recon = loss(recon, img_raw) loss_ssim = 1 - ssim._ssim(0.5 * (1 + img_raw), 0.5 * (1 + recon), ssim_window, 11, 3, True) loss_weighted_recon = util.weighted_l1(recon, img_raw, mask) loss_ = loss_recon + loss_mask + loss_ssim if args.gan_method: loss_ += loss_g optimizer_dis.zero_grad() loss_d.backward() optimizer_dis.step() optimizer.zero_grad() loss_.backward() optimizer.step() epoch_loss += loss_.item() step = i * batch_per_epoch + j if j % 5 == 0: writer.add_scalars( 'loss', { 'recon_l1': loss_recon.item(), 'ssim': loss_ssim.item(), 'exclusion': loss_mask.item(), 'mask_reg': loss_mask_reg.item() }, step) if j % 10 == 0: print( 'Loss: %.3f (recon: %.3f \t ssim: %.3f \t mask: %.3f \t)' % (loss_.item(), loss_recon.item(), loss_ssim.item(), loss_mask.item())) if args.gan_method: print( 'disc: %.3f \t gp: %.3f \t gen: %.3f' % (loss_disc.item(), loss_gp.item(), loss_g.item())) # 记录mask和原图 if j % 50 == 0: #import pdb; pdb.set_trace() writer.add_images('images', [ torch.cat(3 * [mask[0].float().to(device)]), torch.cat( 3 * [mask_wm[0].float().to(device).unsqueeze(0)]), util.denormalize(img_wm[0]), util.denormalize(recon[0]).clamp(0, 1) ], global_step=step, dataformats='CHW') ''' writer.add_image('mask', mask[0], step) writer.add_image('img', util.denormalize(img_wm[0]), step) writer.add_image('recon_c', util.denormalize(recon_coarse[0]).clamp(0, 1), step) writer.add_image('recon_f', util.denormalize(recon_fine[0]).clamp(0, 1), step) writer.add_image('mask_gt', mask_wm[0], step) ''' # 画各层的梯度分布图 if j % 100 == 0: writer.add_figure('grad_flow', util.plot_grad_flow_v2( model.named_parameters()), global_step=step) if args.gan_method: writer.add_figure('discriminator_grad', util.plot_grad_flow_v2( model_dis.named_parameters()), global_step=step) ckpt = { 'ckpt': model.state_dict(), 'optim': optimizer.state_dict() } torch.save(ckpt, os.path.join(args.save_dir, 'latest.pth')) # 记录所有最好的epoch weight if epoch_loss / (j + 1) < best_loss: best_loss = epoch_loss / (j + 1) shutil.copy( os.path.join(args.save_dir, 'latest.pth'), os.path.join(args.save_dir, 'epoch_' + str(i) + '.pth')) except Exception as e: ckpt = {'ckpt': model.state_dict(), 'optim': optimizer.state_dict()} torch.save(ckpt, os.path.join(args.save_dir, 'latest.pth')) print('Save temporary checkpoints to %s' % args.save_dir) print(str(e), traceback.print_exc()) sys.exit(0) print('Done training.') shutil.copyfile(os.path.join(args.save_dir, 'latest.pth'), os.path.join(args.save_dir, 'epoch_%d.pth' % (i + 1)))
def PredictMotion(self): print('Motion: ') P_m = ConditionalMotionNet() param = torch.load(self.model_path + '/PMNet_weight_' + self.model_epoch + '.pth') P_m.load_state_dict(param) if self.gpu > -1: P_m.cuda(self.gpu) with open(self.model_path + '/codebook_m_' + self.model_epoch + '.pkl', 'rb') as f: codebook_m = pickle.load( f) if sys.version_info[0] == 2 else pickle.load( f, encoding='latin1') id1 = int(np.floor((len(codebook_m) - 1) * self.t_m)) id2 = int(np.ceil((len(codebook_m) - 1) * self.t_m)) z_weight = (len(codebook_m) - 1) * self.t_m - np.floor( (len(codebook_m) - 1) * self.t_m) z_m = (1. - z_weight ) * codebook_m[id1:id1 + 1] + z_weight * codebook_m[id2:id2 + 1] z_m = Variable(torch.from_numpy(z_m.astype(np.float32))) if self.gpu > -1: z_m = z_m.cuda(self.gpu) initial_coordinate = np.array([ np.meshgrid(np.linspace(-1, 1, self.w + 2 * self.pad), np.linspace(-1, 1, self.h + 2 * self.pad), sparse=False) ]).astype(np.float32) initial_coordinate = Variable(torch.from_numpy(initial_coordinate)) if self.gpu > -1: initial_coordinate = initial_coordinate.cuda(self.gpu) with torch.no_grad(): test_img = cv2.imread(self.input_path) test_img = cv2.resize(test_img, (self.w, self.h)) test_input = np.array([normalize(test_img)]) test_input = Variable( torch.from_numpy(test_input.transpose(0, 3, 1, 2))) if self.gpu > -1: test_input = test_input.cuda(self.gpu) padded_test_input = F.pad(test_input, (self.pad, self.pad, self.pad, self.pad), mode='reflect') test_img_large = cv2.imread(self.input_path) if self.fw == None or self.fh == None: self.fh, self.fw = test_img_large.shape[:2] test_img_large = cv2.resize(test_img_large, (self.fw, self.fh)) padded_test_input_large = np.array([normalize(test_img_large)]) padded_test_input_large = Variable( torch.from_numpy(padded_test_input_large.transpose(0, 3, 1, 2))) if self.gpu > -1: padded_test_input_large = padded_test_input_large.cuda( self.gpu) scaled_pads = (int(self.pad * self.fh / float(self.h)), int(self.pad * self.fw / float(self.w))) padded_test_input_large = F.pad(padded_test_input_large, (scaled_pads[1], scaled_pads[1], scaled_pads[0], scaled_pads[0]), mode='reflect') V_m = list() V_f = list() old_correpondence = None for t in range(self.TM): sys.stdout.write("\rProcessing frame %d, " % (t + 1)) sys.stdout.flush() flow = P_m(test_input, z_m) flow[:, 0, :, :] = flow[:, 0, :, :] * ( self.w / float(self.pad * 2 + self.w)) flow[:, 1, :, :] = flow[:, 1, :, :] * ( self.h / float(self.pad * 2 + self.h)) flow = F.pad(flow, (self.pad, self.pad, self.pad, self.pad), mode='reflect') flow = self.s_m * flow correspondence = initial_coordinate + flow if old_correpondence is not None: correspondence = F.grid_sample(old_correpondence, correspondence.permute( 0, 2, 3, 1), padding_mode='border') correspondence_large = F.upsample( correspondence, size=(self.fh + scaled_pads[0] * 2, self.fw + scaled_pads[1] * 2), mode='bilinear', align_corners=True) y_large = F.grid_sample(padded_test_input_large, correspondence_large.permute( 0, 2, 3, 1), padding_mode='border') outimg = y_large.data.cpu().numpy()[0].transpose(1, 2, 0) outimg = denormalize(outimg) outimg = outimg[scaled_pads[0]:outimg.shape[0] - scaled_pads[0], scaled_pads[1]:outimg.shape[1] - scaled_pads[1]] V_m.append(outimg) outflowimg = flow.data.cpu().numpy()[0].transpose(1, 2, 0) outflowimg = outflowimg[self.pad:outflowimg.shape[0] - self.pad, self.pad:outflowimg.shape[1] - self.pad] mag, ang = cv2.cartToPolar(outflowimg[..., 1], outflowimg[..., 0]) hsv = np.zeros_like(test_img) hsv[..., 1] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) hsv[..., 0] = ang * 180 / np.pi / 2 hsv[..., 2] = 255 outflowimg = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) outflowimg = cv2.resize(outflowimg, (self.fw, self.fh)) V_f.append(outflowimg) y = F.grid_sample(padded_test_input, correspondence.permute(0, 2, 3, 1), padding_mode='border') test_input = y[:, :, self.pad:y.shape[2] - self.pad, self.pad:y.shape[3] - self.pad] old_correpondence = correspondence V_mloop = generateLoop(V_m) return V_mloop, V_f
def PredictAppearance(self, V_mloop): print('\nAppearance: ', ) minimum_loop_num = int(1 / self.s_a) P_a = ConditionalAppearanceNet(8) param = torch.load(self.model_path + '/PANet_weight_' + self.model_epoch + '.pth') P_a.load_state_dict(param) if self.gpu > -1: P_a.cuda(self.gpu) E_a = define_E(3, 8, 64, which_model_netE='resnet_128', vaeLike=True) param = torch.load(self.model_path + '/EANet_weight_' + self.model_epoch + '.pth') E_a.load_state_dict(param) if self.gpu > -1: E_a.cuda(self.gpu) with torch.no_grad(): interpolated_za_seq = list() input_conditional_test = cv2.resize(V_mloop[0], (128, 128)) input_conditional_test = np.array( [normalize(input_conditional_test)]) input_conditional_test = Variable( torch.from_numpy(input_conditional_test.transpose(0, 3, 1, 2))) if self.gpu > -1: input_conditional_test = input_conditional_test.cuda(self.gpu) za_input, _ = E_a(input_conditional_test) interpolated_za_seq.append(za_input.clone()) with open( self.model_path + '/codebook_a_' + self.model_epoch + '.pkl', 'rb') as f: codebook_a = pickle.load( f) if sys.version_info[0] == 2 else pickle.load( f, encoding='latin1') za_seq = codebook_a[int((len(codebook_a) - 1) * self.t_a)] za_seq = [torch.from_numpy(np.array([za])) for za in za_seq] if self.gpu > -1: za_seq = [za.cuda(self.gpu) for za in za_seq] start_fid = None min_dist = float('inf') for t, mu in enumerate(za_seq): dist = F.mse_loss(za_input, mu).cpu().numpy() if dist < min_dist: min_dist = dist start_fid = t TA = len(za_seq) loop_num = max(minimum_loop_num, int(np.ceil(len(za_seq) / float(len(V_mloop))))) interpolation_size = int((loop_num * len(V_mloop) - TA) / TA) za1 = za_input.clone() for t in range(start_fid + 1, TA): za2 = za_seq[t] for ti in range(interpolation_size): lambd = (ti + 1) / float(interpolation_size + 1) z = (1. - lambd) * za1 + lambd * za2 interpolated_za_seq.append(z) interpolated_za_seq.append(za2) za1 = za2 za1 = za_input.clone() for t in range(start_fid - 1, -1, -1): za2 = za_seq[t] for ti in range(interpolation_size - 1, -1, -1): lambd = (ti + 1) / float(interpolation_size + 1) z = (1. - lambd) * za2 + lambd * za1 interpolated_za_seq.insert(0, z) interpolated_za_seq.insert(0, za2) za1 = za2 loop_num = int( np.ceil(TA * (interpolation_size + 1) / float(len(V_mloop)))) interpolation_size2 = int(interpolation_size + loop_num * len(V_mloop) - TA * (interpolation_size + 1)) z_start = za_input.clone() if start_fid == 0 else za_seq[0] z_final = za_input.clone() if start_fid == TA - 1 else za_seq[-1] for ti in range(interpolation_size2): lambd = (ti + 1) / float(interpolation_size2 + 1) z = (1. - lambd) * z_final + lambd * z_start interpolated_za_seq.append(z) zaid = (interpolation_size + 1) * start_fid V = list() t = 0 for loop in range(loop_num): for frame in V_mloop: sys.stdout.write("\rProcessing frame %d, " % (t + 1)) sys.stdout.flush() t += 1 test_input = cv2.resize(frame, (self.w, self.h)) test_input = np.array([normalize(test_input)]) test_input = Variable( torch.from_numpy(test_input.transpose(0, 3, 1, 2))) if self.gpu > -1: test_input = test_input.cuda(self.gpu) test_input_large = np.array([normalize(frame)]) test_input_large = Variable( torch.from_numpy(test_input_large.transpose( 0, 3, 1, 2))) if self.gpu > -1: test_input_large = test_input_large.cuda(self.gpu) z = interpolated_za_seq[zaid] y, al, bl = P_a(test_input, z) al_large = F.upsample(al, size=(self.fh, self.fw), mode='bilinear', align_corners=True) bl_large = F.upsample(bl, size=(self.fh, self.fw), mode='bilinear', align_corners=True) y = F.tanh(al_large * test_input_large + bl_large) outimg = y.data.cpu().numpy()[0].transpose(1, 2, 0) V.append(denormalize(outimg)) zaid += 1 if zaid > len(interpolated_za_seq) - 1: zaid = 0 return V
kl_loss = -0.5 * K.sum( 1 + z_log_var - K.square(z_mean) - K.square(K.exp(z_log_var)), axis=-1) # return the average loss over all images in batch vae_loss = K.mean(reconstruction_loss + kl_loss) # Compile reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=0.001) model.add_loss(vae_loss) optimizer = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer=optimizer) #checkpoint = ModelCheckpoint(filepath='x_best_weight.hdf5', verbose=1, save_best_only=True) #for epoch in range(epochs): history = model.fit(X_train, batch_size=batch_size, epochs=epochs) # Predicting pred = model.predict(X[10000:15000], batch_size=20) pred = denormalize(pred, X_max, X_min).flatten() set_audio('{}_{}_epoch.wav'.format(name, pct), rate, pred)