def process_landmarks(self, input_path, output_dir, seq_file_path): if not self.cache_landmarks: return input_path_no_ext, input_ext = os.path.splitext(input_path) # Load sequences from file with open(seq_file_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Initialize transforms img_transforms = img_landmarks_transforms.Compose([ ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # For each sequence for seq in seq_list: curr_vid_name = os.path.basename( input_path_no_ext) + '_seq%02d%s' % (seq.id, input_ext) curr_vid_path = os.path.join(output_dir, curr_vid_name) curr_lms_path = os.path.splitext( curr_vid_path)[0] + self.landmarks_postfix if os.path.isfile(curr_lms_path): continue print('=> Computing face landmarks for video: "%s"...' % curr_vid_name) # Initialize input video in_vid = VideoInferenceDataset(curr_vid_path, transform=img_transforms) in_vid_loader = DataLoader(in_vid, batch_size=self.lms_batch_size, num_workers=1, pin_memory=True, drop_last=False, shuffle=False) # For each batch of frames in the input video seq_landmarks = [] for i, frame in enumerate( tqdm(in_vid_loader, unit='batches', file=sys.stdout)): frame = frame.to(self.device) H = self.L(frame) landmarks = self.heatmap_encoder(H) seq_landmarks.append(landmarks.cpu().numpy()) seq_landmarks = np.concatenate(seq_landmarks) # Save landmarks to file seq_landmarks_smoothed = smooth_landmarks_98pts( seq_landmarks, self.smooth_landmarks) np.savez_compressed(curr_lms_path, landmarks=seq_landmarks, landmarks_smoothed=seq_landmarks_smoothed)
def process_segmentation(self, input_path, output_dir, seq_file_path): if not self.cache_segmentation: return input_path_no_ext, input_ext = os.path.splitext(input_path) # Load sequences from file with open(seq_file_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Initialize transforms img_transforms = img_landmarks_transforms.Compose([ ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) # For each sequence for seq in seq_list: curr_vid_name = os.path.basename( input_path_no_ext) + '_seq%02d%s' % (seq.id, input_ext) curr_vid_path = os.path.join(output_dir, curr_vid_name) curr_seg_path = os.path.splitext( curr_vid_path)[0] + self.segmentation_postfix if self.seg_remove_mouth: curr_lms_path = os.path.splitext( curr_vid_path)[0] + self.landmarks_postfix landmarks = np.load(curr_lms_path)['landmarks_smoothed'] frame_count = 0 if os.path.isfile(curr_seg_path): continue print('=> Computing face segmentation for video: "%s"...' % curr_vid_name) # Initialize input video in_vid = VideoInferenceDataset(curr_vid_path, transform=img_transforms) in_vid_loader = DataLoader(in_vid, batch_size=self.seg_batch_size, num_workers=1, pin_memory=True, drop_last=False, shuffle=False) # For each batch of frames in the input video pbar = tqdm(in_vid_loader, unit='batches') prev_segmentation = None r = self.smooth_seg.kernel_radius encoded_segmentations = [] pad_prev, pad_next = r, r # This initialization is only relevant if there is a leftover from last batch for i, frame in enumerate(pbar): frame = frame.to(self.device) # Compute segmentation raw_segmentation = self.S(frame) segmentation = torch.cat((prev_segmentation, raw_segmentation), dim=0) \ if prev_segmentation is not None else raw_segmentation if segmentation.shape[0] > r: pad_prev, pad_next = r if prev_segmentation is None else 0, min( r, self.seg_batch_size - frame.shape[0]) segmentation = self.smooth_seg(segmentation, pad_prev=pad_prev, pad_next=pad_next) # Note: the pad_next value here is only relevant if there is a leftover from last batch prev_segmentation = raw_segmentation[-(r * 2 - pad_next):] mask = segmentation.argmax(1) == 1 # Encode segmentation for b in range(mask.shape[0]): curr_mask = mask[b].cpu().numpy() if self.seg_remove_mouth: curr_mask = remove_inner_mouth(curr_mask, landmarks[frame_count]) frame_count += 1 encoded_segmentations.append(encode_binary_mask(curr_mask)) # Final iteration if we have leftover unsmoothed segmentations from the last batch if pad_next < r: # Compute segmentation segmentation = self.smooth_seg(prev_segmentation, pad_prev=pad_prev, pad_next=r) mask = segmentation.argmax(1) == 1 # Encode segmentation for b in range(mask.shape[0]): curr_mask = mask[b].cpu().numpy() if self.seg_remove_mouth: curr_mask = remove_inner_mouth(curr_mask, landmarks[frame_count]) frame_count += 1 encoded_segmentations.append(encode_binary_mask(curr_mask)) # Write to file with open(curr_seg_path, "wb") as fp: # Pickling pickle.dump(encoded_segmentations, fp)