def main(): # Check if arguments are okay error = check() if error: print(error) exit(1) # Create extraction folder and extract frames IS_WINDOWS = 'Windows' == platform.system() extractionDir = "tmpSuperSloMo" if not IS_WINDOWS: # Assuming UNIX-like system where "." indicates hidden directories extractionDir = "." + extractionDir if os.path.isdir(extractionDir): rmtree(extractionDir) os.mkdir(extractionDir) if IS_WINDOWS: FILE_ATTRIBUTE_HIDDEN = 0x02 # ctypes.windll only exists on Windows ctypes.windll.kernel32.SetFileAttributesW(extractionDir, FILE_ATTRIBUTE_HIDDEN) extractionPath = os.path.join(extractionDir, "input") outputPath = os.path.join(extractionDir, "output") os.mkdir(extractionPath) os.mkdir(outputPath) error = extract_frames(args.video, extractionPath) if error: print(error) exit(1) # Initialize transforms device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # import ipdb; ipdb.set_trace() mean = [0.429, 0.431, 0.397] std = [1, 1, 1] normalize = transforms.Normalize(mean=mean, std=std) negmean = [x * -1 for x in mean] revNormalize = transforms.Normalize(mean=negmean, std=std) # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 - # - Removed per channel mean subtraction for CPU. if (device == "cpu"): transform = transforms.Compose([transforms.ToTensor()]) TP = transforms.Compose([transforms.ToPILImage()]) else: transform = transforms.Compose([transforms.ToTensor(), normalize]) TP = transforms.Compose([revNormalize, transforms.ToPILImage()]) # Load data videoFrames = dataloader.Video(root=extractionPath, transform=transform) videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=args.batch_size, shuffle=False) # Initialize model flowComp = model.UNet(6, 4) flowComp.to(device) for param in flowComp.parameters(): param.requires_grad = False ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) for param in ArbTimeFlowIntrp.parameters(): param.requires_grad = False flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device) flowBackWarp = flowBackWarp.to(device) dict1 = torch.load(args.checkpoint, map_location='cpu') ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) flowComp.load_state_dict(dict1['state_dictFC']) # Interpolate frames frameCounter = 1 with torch.no_grad(): for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0): I0 = frame0.to(device) I1 = frame1.to(device) flowOut = flowComp(torch.cat((I0, I1), dim=1)) F_0_1 = flowOut[:, :2, :, :] F_1_0 = flowOut[:, 2:, :, :] # Save reference frames in output folder for batchIndex in range(args.batch_size): (TP(frame0[batchIndex].detach())).resize( videoFrames.origDim, Image.BILINEAR).save( os.path.join( outputPath, str(frameCounter + args.sf * batchIndex) + ".jpg")) frameCounter += 1 # Generate intermediate frames for intermediateIndex in range(1, args.sf): t = intermediateIndex / args.sf temp = -t * (1 - t) fCoeff = [temp, t * t, (1 - t) * (1 - t), temp] F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 g_I0_F_t_0 = flowBackWarp(I0, F_t_0) g_I1_F_t_1 = flowBackWarp(I1, F_t_1) intrpOut = ArbTimeFlowIntrp( torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1)) F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :]) V_t_1 = 1 - V_t_0 g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f) wCoeff = [1 - t, t] Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) # Save intermediate frame for batchIndex in range(args.batch_size): (TP(Ft_p[batchIndex].cpu().detach())).resize( videoFrames.origDim, Image.BILINEAR).save( os.path.join( outputPath, str(frameCounter + args.sf * batchIndex) + ".jpg")) frameCounter += 1 # Set counter accounting for batching of frames frameCounter += args.sf * (args.batch_size - 1) # Generate video from interpolated frames create_video(outputPath) # Remove temporary files rmtree(extractionDir) exit(0)
def main(): extractPath = "./video_interpolation" prepare_folders(extractPath) video_to_images(args.video, os.path.join(extractPath, "input")) # Initialize transforms device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") normalize = torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[1, 1, 1]) revNormalize = torchvision.transforms.Normalize(mean=[-0.5, -0.5, -0.5], std=[1, 1, 1]) if (device == "cpu"): transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) TP = torchvision.transforms.Compose([torchvision.transforms.ToPILImage()]) else: transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), normalize]) TP = torchvision.transforms.Compose([revNormalize, torchvision.transforms.ToPILImage()]) # Load data videoFrames = dataloader.Video(root=os.path.join(extractPath, "input"), transform=transform) videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=2, shuffle=False) # Initialize model flowComp = model.UNet(6, 4) flowComp.to(device) for param in flowComp.parameters(): param.requires_grad = False ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) for param in ArbTimeFlowIntrp.parameters(): param.requires_grad = False flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device) flowBackWarp = flowBackWarp.to(device) dict1 = torch.load("./checkpoints/Interpolation0.ckpt", map_location=device) ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) flowComp.load_state_dict(dict1['state_dictFC']) # Interpolate frames frameCounter = 0 # batch_size = 2 with torch.no_grad(): for frameIndex, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0): I0 = frame0.to(device) I1 = frame1.to(device) flowOut = flowComp(torch.cat((I0, I1), dim=1)) F_0_1 = flowOut[:,:2,:,:] F_1_0 = flowOut[:,2:,:,:] # Save reference frames in output folder for batchIndex in range(2): (TP(frame0[batchIndex].detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(os.path.join(extractPath, "output"), "frame{:05d}.png".format(frameCounter + 2 * batchIndex))) frameCounter += 1 # Generate intermediate frame t = float(1) / 2 temp = -t * (1 - t) fCoeff = [temp, t * t, (1 - t) * (1 - t), temp] F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 g_I0_F_t_0 = flowBackWarp(I0, F_t_0) g_I1_F_t_1 = flowBackWarp(I1, F_t_1) intrpOut = ArbTimeFlowIntrp(torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1)) F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 V_t_0 = torch.sigmoid(intrpOut[:, 4:5, :, :]) V_t_1 = 1 - V_t_0 g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f) wCoeff = [1 - t, t] Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) # Save intermediate frame for batchIndex in range(2): (TP(Ft_p[batchIndex].cpu().detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(os.path.join(extractPath, "output"), "frame{:05d}.png".format(frameCounter + 2 * batchIndex))) frameCounter += 1 frameCounter += 2 # Generate video from interpolated frames create_video(os.path.join(extractPath, "output"), os.path.join(extractPath, "output"))
def main(): # Check if arguments are okay error = check() if error: print(error) exit(1) # Create extraction folder and extract frames # Assuming UNIX-like system where "." indicates hidden directories extractionDir = ".tmpSuperSloMo" # if os.path.isdir(extractionDir): # rmtree(extractionDir) # os.mkdir(extractionDir) extractionPath = os.path.join(extractionDir, "input") outputPath = os.path.join(extractionDir, "output") # os.mkdir(extractionPath) # os.mkdir(outputPath) # error = extract_frames(args.video, extractionPath) # if error: # print(error) # exit(1) # Initialize transforms device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") mean = [0.429, 0.431, 0.397] std = [1, 1, 1] normalize = transforms.Normalize(mean=mean, std=std) negmean = [x * -1 for x in mean] revNormalize = transforms.Normalize(mean=negmean, std=std) # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 - # - Removed per channel mean subtraction for CPU. if (device == "cpu"): transform = transforms.Compose([transforms.ToTensor()]) TP = transforms.Compose([transforms.ToPILImage()]) else: transform = transforms.Compose([transforms.ToTensor(), normalize]) TP = transforms.Compose([revNormalize, transforms.ToPILImage()]) # Load data videoFrames = dataloader.Video(root=extractionPath, transform=transform) # pdb.set_trace() # len(videoFrames[0]) ==> 2 # (Pdb) videoFrames[0][0].size() # torch.Size([3, 512, 960]) videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=1, shuffle=False) # Initialize model # UNet(inChannels, outChannels) # flow Computation !!! flowComp = model.UNet(6, 4) flowComp.to(device) for param in flowComp.parameters(): param.requires_grad = False # arbitary-time ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) for param in ArbTimeFlowIntrp.parameters(): param.requires_grad = False flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device) flowBackWarp = flowBackWarp.to(device) flowBackWarp = amp.initialize(flowBackWarp, opt_level= "O1") # pdb.set_trace() # (Pdb) videoFrames.dim[0], videoFrames.dim[1] # (960, 512) dict1 = torch.load(args.checkpoint, map_location='cpu') # dict_keys(['Detail', 'epoch', 'timestamp', 'trainBatchSz', 'validationBatchSz', # 'learningRate', 'loss', 'valLoss', 'valPSNR', 'state_dictFC', 'state_dictAT']) ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) flowComp.load_state_dict(dict1['state_dictFC']) ArbTimeFlowIntrp = amp.initialize(ArbTimeFlowIntrp, opt_level= "O1") flowComp = amp.initialize(flowComp, opt_level= "O1") # Interpolate frames frameCounter = 1 with torch.no_grad(): for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0): I0 = frame0.to(device) I1 = frame1.to(device) # pdb.set_trace() # torch.Size([1, 3, 512, 960]) flowOut = flowComp(torch.cat((I0, I1), dim=1)) F_0_1 = flowOut[:,:2,:,:] F_1_0 = flowOut[:,2:,:,:] # (Pdb) pp flowOut.size() # torch.Size([1, 4, 512, 960]) # (Pdb) pp F_0_1.size() # torch.Size([1, 2, 512, 960]) # (Pdb) pp F_1_0.size() # torch.Size([1, 2, 512, 960]) # pdb.set_trace() # (Pdb) pp I0.size(), I1.size() # (torch.Size([1, 3, 512, 960]), torch.Size([1, 3, 512, 960])) # (Pdb) pp torch.cat((I0, I1), dim=1).size() # torch.Size([1, 6, 512, 960]) # Save reference frames in output folder (TP(frame0[0].detach())).resize(videoFrames.origDim, Image.BILINEAR).save(\ os.path.join(outputPath, "{:06d}.png".format(frameCounter))) frameCounter += 1 # Generate intermediate frames # (Pdb) for i in range(1, args.sf): print(i) # 1 # 2 # 3 for intermediateIndex in range(1, args.sf): t = float(intermediateIndex) / args.sf temp = -t * (1 - t) fCoeff = [temp, t * t, (1 - t) * (1 - t), temp] pdb.set_trace() # (Pdb) pp temp # -0.1875 # (Pdb) pp fCoeff # [-0.1875, 0.0625, 0.5625, -0.1875] F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 g_I0_F_t_0 = flowBackWarp(I0, F_t_0) g_I1_F_t_1 = flowBackWarp(I1, F_t_1) intrpOut = ArbTimeFlowIntrp(\ torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, \ g_I1_F_t_1, g_I0_F_t_0), \ dim=1)) F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 pdb.set_trace() # pdb.set_trace() # (Pdb) intrpOut.size() # torch.Size([1, 5, 512, 960]) V_t_0 = torch.sigmoid(intrpOut[:, 4:5, :, :]) V_t_1 = 1 - V_t_0 g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f) # pdb.set_trace() wCoeff = [1 - t, t] Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) del g_I0_F_t_0_f, g_I1_F_t_1_f, F_t_0_f, F_t_1_f, F_t_0, F_t_1, intrpOut, V_t_0, V_t_1, wCoeff torch.cuda.empty_cache() pdb.set_trace() # Save intermediate frame (TP(Ft_p[0].cpu().detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(outputPath, "{:06d}.png".format(frameCounter))) del Ft_p torch.cuda.empty_cache() frameCounter += 1 del F_0_1, F_1_0, flowOut, I0, I1, frame0, frame1 torch.cuda.empty_cache() # Generate video from interpolated frames # create_video(outputPath) # Remove temporary files # rmtree(extractionDir) exit(0)
def evaluate_frame_dir(extractionPath): outputPath = os.path.join(extractionDir, "output") inputframe_dir = os.path.join(extractionDir, "inputframe") if op.exists(outputPath): rmtree(outputPath) if op.exists(inputframe_dir): rmtree(inputframe_dir) os.makedirs(outputPath, exist_ok=True) os.makedirs(inputframe_dir, exist_ok=True) frames_gt = os.listdir(extractionPath) frames_gt.sort() print(frames_gt) for ind, i in enumerate(frames_gt): if ind % (args.sf) == 0: shutil.copyfile(os.path.join(extractionPath, i), os.path.join(inputframe_dir, i)) video_time = time.time() # Initialize transforms device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") mean = [0.429, 0.431, 0.397] std = [1, 1, 1] normalize = transforms.Normalize(mean=mean, std=std) negmean = [x * -1 for x in mean] revNormalize = transforms.Normalize(mean=negmean, std=std) # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 - # - Removed per channel mean subtraction for CPU. if (device == "cpu"): transform = transforms.Compose([transforms.ToTensor() ]) #添加一个转化函数,后面用于对每个img做filter TP = transforms.Compose([transforms.ToPILImage()]) else: transform = transforms.Compose([transforms.ToTensor(), normalize]) TP = transforms.Compose([revNormalize, transforms.ToPILImage()]) # Load data videoFrames = dataloader.Video(root=inputframe_dir, transform=transform) videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=args.batch_size, shuffle=False) # Initialize model #第一个unet,用于计算光流 flowComp = model.UNet(6, 4) flowComp.to(device) #这里只需要inference,去掉训练bp for param in flowComp.parameters(): param.requires_grad = False #第二个UNET,用于合成 ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) for param in ArbTimeFlowIntrp.parameters(): param.requires_grad = False flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device) flowBackWarp = flowBackWarp.to(device) #加载模型的checkpoint dict1 = torch.load(args.checkpoint, map_location='cpu') ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) flowComp.load_state_dict(dict1['state_dictFC']) # Interpolate frames frameCounter = 0 ''' Tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。 ''' with torch.no_grad(): for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0): I0 = frame0.to(device) I1 = frame1.to(device) #print (I0.shape) #!!!!实现细节:在dim1连接起来!!!! flowOut = flowComp(torch.cat((I0, I1), dim=1)) #flowout中应该是前0,1维度为0-》1的光流,2,3维度为1-》0光流 F_0_1 = flowOut[:, :2, :, :] F_1_0 = flowOut[:, 2:, :, :] # Save reference frames in output folder #保存原始视频帧 for batchIndex in range(args.batch_size): pass #(TP(frame0[batchIndex].detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(outputPath, str(frameCounter + args.sf * batchIndex) + ".jpg")) frameCounter += 1 sttime = time.time() # Generate intermediate frames for intermediateIndex in range(1, args.sf): t = intermediateIndex / args.sf temp = -t * (1 - t) fCoeff = [temp, t * t, (1 - t) * (1 - t), temp] F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 #第一个unet的初步结果,先看看这里的效果和我第一步骤对比 g_I0_F_t_0 = flowBackWarp(I0, F_t_0) g_I1_F_t_1 = flowBackWarp(I1, F_t_1) #将上面一堆参数连接起来,送入下一个预测网络中 intrpOut = ArbTimeFlowIntrp( torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1)) F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :]) V_t_1 = 1 - V_t_0 g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f) wCoeff = [1 - t, t] #注意这里将mask加入到两帧合成上的方式,还加入了时间序列 Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) #print (Ft_p.shape) #这里看看他的中间结果怎么样? #Ft_p = (wCoeff[0] * g_I0_F_t_0 + wCoeff[1] * g_I1_F_t_1) #结果虽然时序上感觉有点抖动,但是清晰度上却是相当好,应该是loss函数的问题 # Save intermediate frame #保存中间插入的帧 for batchIndex in range(args.batch_size): #ttp="%06d.jpg"%(frameCounter + args.sf * batchIndex) ttp = frames_gt[frameCounter + args.sf * batchIndex] ttp = os.path.join(outputPath, ttp) #print (videoFrames.origDim) #(480, 270) (TP(Ft_p[batchIndex].cpu().detach())).save(ttp) frameCounter += 1 print("run %d iters, time:%f ,average:%f s/iter" % (args.sf - 1, time.time() - sttime, (time.time() - sttime) / (args.sf - 1))) # Set counter accounting for batching of frames frameCounter += args.sf * (args.batch_size - 1) ssim_kep = [] psnr_kep = [] for i in os.listdir(outputPath): gt_img = cv2.imread(os.path.join(extractionPath, i)) genimg = cv2.imread(os.path.join(outputPath, i)) #scale>0表示将标签图分辨率乘scale为目的分辨率,小于0表示使用生成图像分辨率 scale = 1 if scale > 0: target_shape = (int(gt_img.shape[1] * scale), int(gt_img.shape[0] * scale)) else: target_shape = (genimg.shape[1], genimg.shape[0]) #print (genimg.shape) gt_img = cv2.resize(gt_img, target_shape) genimg = cv2.resize(genimg, target_shape) psnr = skimage.measure.compare_psnr(gt_img, genimg, 255) ssim = skimage.measure.compare_ssim(gt_img, genimg, multichannel=True) psnr_kep.append(psnr) ssim_kep.append(ssim) print("mean psnr:", np.mean(psnr_kep)) print("mean ssim:", np.mean(ssim_kep)) print("this video time used:", time.time() - video_time) # Generate video from interpolated frames #create_video(outputPath) # Remove temporary files rmtree(outputPath) rmtree(inputframe_dir)
def main(): # Check if arguments are okay error = check() if error: print(error) exit(1) # Create extraction folder and extract frames IS_WINDOWS = 'Windows' == platform.system() extractionDir = "tmpSuperSloMo" #这里需要有个文件夹放截出来的帧,其实没必要费力去把这个文件夹搞成隐藏的 if not IS_WINDOWS: # Assuming UNIX-like system where "." indicates hidden directories extractionDir = "." + extractionDir if os.path.isdir(extractionDir): rmtree(extractionDir) os.mkdir(extractionDir) if IS_WINDOWS: FILE_ATTRIBUTE_HIDDEN = 0x02 # ctypes.windll only exists on Windows ctypes.windll.kernel32.SetFileAttributesW(extractionDir, FILE_ATTRIBUTE_HIDDEN) extractionPath = os.path.join(extractionDir, "input") outputPath = os.path.join(extractionDir, "output") os.mkdir(extractionPath) os.mkdir(outputPath) error = extract_frames(args.video, extractionPath) if error: print(error) exit(1) # Initialize transforms device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") mean = [0.429, 0.431, 0.397] std = [1, 1, 1] normalize = transforms.Normalize(mean=mean, std=std) negmean = [x * -1 for x in mean] revNormalize = transforms.Normalize(mean=negmean, std=std) # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 - # - Removed per channel mean subtraction for CPU. if (device == "cpu"): transform = transforms.Compose([transforms.ToTensor() ]) #添加一个转化函数,后面用于对每个img做filter TP = transforms.Compose([transforms.ToPILImage()]) else: transform = transforms.Compose([transforms.ToTensor(), normalize]) TP = transforms.Compose([revNormalize, transforms.ToPILImage()]) # Load data videoFrames = dataloader.Video(root=extractionPath, transform=transform) videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=args.batch_size, shuffle=False) # Initialize model #第一个unet,用于计算光流 flowComp = model.UNet(6, 4) flowComp.to(device) #这里只需要inference,去掉训练bp for param in flowComp.parameters(): param.requires_grad = False #第二个UNET,用于合成 ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) for param in ArbTimeFlowIntrp.parameters(): param.requires_grad = False flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device) flowBackWarp = flowBackWarp.to(device) #加载模型的checkpoint dict1 = torch.load(args.checkpoint, map_location='cpu') ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) flowComp.load_state_dict(dict1['state_dictFC']) # Interpolate frames frameCounter = 1 ''' Tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。 ''' with torch.no_grad(): for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0): I0 = frame0.to(device) I1 = frame1.to(device) #!!!!实现细节:在dim1连接起来!!!! flowOut = flowComp(torch.cat((I0, I1), dim=1)) #flowout中应该是前0,1维度为0-》1的光流,2,3维度为1-》0光流 F_0_1 = flowOut[:, :2, :, :] F_1_0 = flowOut[:, 2:, :, :] # Save reference frames in output folder #保存原始视频帧 for batchIndex in range(args.batch_size): (TP(frame0[batchIndex].detach())).resize( videoFrames.origDim, Image.BILINEAR).save( os.path.join( outputPath, str(frameCounter + args.sf * batchIndex) + ".jpg")) frameCounter += 1 # Generate intermediate frames for intermediateIndex in range(1, args.sf): t = intermediateIndex / args.sf temp = -t * (1 - t) fCoeff = [temp, t * t, (1 - t) * (1 - t), temp] F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 #第一个unet的初步结果,先看看这里的效果和我第一步骤对比 g_I0_F_t_0 = flowBackWarp(I0, F_t_0) g_I1_F_t_1 = flowBackWarp(I1, F_t_1) #将上面一堆参数连接起来,送入下一个预测网络中 intrpOut = ArbTimeFlowIntrp( torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1)) F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :]) V_t_1 = 1 - V_t_0 g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f) wCoeff = [1 - t, t] #注意这里将mask加入到两帧合成上的方式,还加入了时间序列 Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) #这里看看他的中间结果怎么样? #Ft_p = (wCoeff[0] * g_I0_F_t_0 + wCoeff[1] * g_I1_F_t_1) #结果虽然时序上感觉有点抖动,但是清晰度上却是相当好,应该是loss函数的问题 # Save intermediate frame #保存中间插入的帧 for batchIndex in range(args.batch_size): (TP(Ft_p[batchIndex].cpu().detach())).resize( videoFrames.origDim, Image.BILINEAR).save( os.path.join( outputPath, str(frameCounter + args.sf * batchIndex) + ".jpg")) frameCounter += 1 # Set counter accounting for batching of frames frameCounter += args.sf * (args.batch_size - 1) # Generate video from interpolated frames create_video(outputPath) # Remove temporary files rmtree(extractionDir) exit(0)
def main(): os.makedirs(args.output, exist_ok=True) outputPath = args.output if args.sf < 2: print("Slowmo factor must be at least 2.") return # Initialize transforms device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") mean = [0.429, 0.431, 0.397] std = [1, 1, 1] normalize = transforms.Normalize(mean=mean, std=std) negmean = [x * -1 for x in mean] revNormalize = transforms.Normalize(mean=negmean, std=std) # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 - # - Removed per channel mean subtraction for CPU. if (device == "cpu"): transform = transforms.Compose([transforms.ToTensor()]) TP = transforms.Compose([transforms.ToPILImage()]) else: transform = transforms.Compose([transforms.ToTensor(), normalize]) TP = transforms.Compose([revNormalize, transforms.ToPILImage()]) # Load data videoFrames = dataloader.Video(root=extractionPath, transform=transform) videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=args.batch_size, shuffle=False) # Initialize model flowComp = model.UNet(6, 4) flowComp.to(device) for param in flowComp.parameters(): param.requires_grad = False ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) for param in ArbTimeFlowIntrp.parameters(): param.requires_grad = False flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device) flowBackWarp = flowBackWarp.to(device) dict1 = torch.load(args.checkpoint, map_location='cpu') ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) flowComp.load_state_dict(dict1['state_dictFC']) # Interpolate frames frameCounter = 1 with torch.no_grad(): for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0): I0 = frame0.to(device) I1 = frame1.to(device) flowOut = flowComp(torch.cat((I0, I1), dim=1)) F_0_1 = flowOut[:, :2, :, :] F_1_0 = flowOut[:, 2:, :, :] # Save reference frames in output folder for batchIndex in range(args.batch_size): (TP(frame0[batchIndex].detach())).resize( videoFrames.origDim, Image.BILINEAR).save( os.path.join( outputPath, str(frameCounter + args.sf * batchIndex).zfill(8) + ".png")) frameCounter += 1 # Generate intermediate frames for intermediateIndex in range(1, args.sf): t = float(intermediateIndex) / args.sf temp = -t * (1 - t) fCoeff = [temp, t * t, (1 - t) * (1 - t), temp] F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 g_I0_F_t_0 = flowBackWarp(I0, F_t_0) g_I1_F_t_1 = flowBackWarp(I1, F_t_1) intrpOut = ArbTimeFlowIntrp( torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1)) F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 V_t_0 = torch.sigmoid(intrpOut[:, 4:5, :, :]) V_t_1 = 1 - V_t_0 g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f) wCoeff = [1 - t, t] Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) # Save intermediate frame for batchIndex in range(args.batch_size): (TP(Ft_p[batchIndex].cpu().detach())).resize( videoFrames.origDim, Image.BILINEAR).save( os.path.join( outputPath, str(frameCounter + args.sf * batchIndex).zfill(8) + ".png")) frameCounter += 1 # Set counter accounting for batching of frames frameCounter += args.sf * (args.batch_size - 1) exit(0)