Exemple #1
0
    def process_landmarks(self, input_path, output_dir, seq_file_path):
        if not self.cache_landmarks:
            return
        input_path_no_ext, input_ext = os.path.splitext(input_path)

        # Load sequences from file
        with open(seq_file_path, "rb") as fp:  # Unpickling
            seq_list = pickle.load(fp)

        # Initialize transforms
        img_transforms = img_landmarks_transforms.Compose([
            ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        # For each sequence
        for seq in seq_list:
            curr_vid_name = os.path.basename(
                input_path_no_ext) + '_seq%02d%s' % (seq.id, input_ext)
            curr_vid_path = os.path.join(output_dir, curr_vid_name)
            curr_lms_path = os.path.splitext(
                curr_vid_path)[0] + self.landmarks_postfix

            if os.path.isfile(curr_lms_path):
                continue
            print('=> Computing face landmarks for video: "%s"...' %
                  curr_vid_name)

            # Initialize input video
            in_vid = VideoInferenceDataset(curr_vid_path,
                                           transform=img_transforms)
            in_vid_loader = DataLoader(in_vid,
                                       batch_size=self.lms_batch_size,
                                       num_workers=1,
                                       pin_memory=True,
                                       drop_last=False,
                                       shuffle=False)

            # For each batch of frames in the input video
            seq_landmarks = []
            for i, frame in enumerate(
                    tqdm(in_vid_loader, unit='batches', file=sys.stdout)):
                frame = frame.to(self.device)
                H = self.L(frame)
                landmarks = self.heatmap_encoder(H)
                seq_landmarks.append(landmarks.cpu().numpy())
            seq_landmarks = np.concatenate(seq_landmarks)

            # Save landmarks to file
            seq_landmarks_smoothed = smooth_landmarks_98pts(
                seq_landmarks, self.smooth_landmarks)
            np.savez_compressed(curr_lms_path,
                                landmarks=seq_landmarks,
                                landmarks_smoothed=seq_landmarks_smoothed)
Exemple #2
0
    def process_segmentation(self, input_path, output_dir, seq_file_path):
        if not self.cache_segmentation:
            return
        input_path_no_ext, input_ext = os.path.splitext(input_path)

        # Load sequences from file
        with open(seq_file_path, "rb") as fp:  # Unpickling
            seq_list = pickle.load(fp)

        # Initialize transforms
        img_transforms = img_landmarks_transforms.Compose([
            ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

        # For each sequence
        for seq in seq_list:
            curr_vid_name = os.path.basename(
                input_path_no_ext) + '_seq%02d%s' % (seq.id, input_ext)
            curr_vid_path = os.path.join(output_dir, curr_vid_name)
            curr_seg_path = os.path.splitext(
                curr_vid_path)[0] + self.segmentation_postfix

            if self.seg_remove_mouth:
                curr_lms_path = os.path.splitext(
                    curr_vid_path)[0] + self.landmarks_postfix
                landmarks = np.load(curr_lms_path)['landmarks_smoothed']
                frame_count = 0

            if os.path.isfile(curr_seg_path):
                continue
            print('=> Computing face segmentation for video: "%s"...' %
                  curr_vid_name)

            # Initialize input video
            in_vid = VideoInferenceDataset(curr_vid_path,
                                           transform=img_transforms)
            in_vid_loader = DataLoader(in_vid,
                                       batch_size=self.seg_batch_size,
                                       num_workers=1,
                                       pin_memory=True,
                                       drop_last=False,
                                       shuffle=False)

            # For each batch of frames in the input video
            pbar = tqdm(in_vid_loader, unit='batches')
            prev_segmentation = None
            r = self.smooth_seg.kernel_radius
            encoded_segmentations = []
            pad_prev, pad_next = r, r  # This initialization is only relevant if there is a leftover from last batch
            for i, frame in enumerate(pbar):
                frame = frame.to(self.device)

                # Compute segmentation
                raw_segmentation = self.S(frame)
                segmentation = torch.cat((prev_segmentation, raw_segmentation), dim=0) \
                    if prev_segmentation is not None else raw_segmentation
                if segmentation.shape[0] > r:
                    pad_prev, pad_next = r if prev_segmentation is None else 0, min(
                        r, self.seg_batch_size - frame.shape[0])
                    segmentation = self.smooth_seg(segmentation,
                                                   pad_prev=pad_prev,
                                                   pad_next=pad_next)

                    # Note: the pad_next value here is only relevant if there is a leftover from last batch
                    prev_segmentation = raw_segmentation[-(r * 2 - pad_next):]

                mask = segmentation.argmax(1) == 1

                # Encode segmentation
                for b in range(mask.shape[0]):
                    curr_mask = mask[b].cpu().numpy()
                    if self.seg_remove_mouth:
                        curr_mask = remove_inner_mouth(curr_mask,
                                                       landmarks[frame_count])
                        frame_count += 1
                    encoded_segmentations.append(encode_binary_mask(curr_mask))

            # Final iteration if we have leftover unsmoothed segmentations from the last batch
            if pad_next < r:
                # Compute segmentation
                segmentation = self.smooth_seg(prev_segmentation,
                                               pad_prev=pad_prev,
                                               pad_next=r)
                mask = segmentation.argmax(1) == 1

                # Encode segmentation
                for b in range(mask.shape[0]):
                    curr_mask = mask[b].cpu().numpy()
                    if self.seg_remove_mouth:
                        curr_mask = remove_inner_mouth(curr_mask,
                                                       landmarks[frame_count])
                        frame_count += 1
                    encoded_segmentations.append(encode_binary_mask(curr_mask))

            # Write to file
            with open(curr_seg_path, "wb") as fp:  # Pickling
                pickle.dump(encoded_segmentations, fp)