Exemple #1
0
    def __call__(self, img, landmarks, bbox):
        img = np.array(img).copy()
        if self.align:
            img, landmarks = align_crop(img, landmarks, bbox, self.bbox_scale,
                                        self.bbox_square)
        else:
            bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square)
            img, landmarks = crop_img(img, landmarks, bbox_scaled)

        img = Image.fromarray(img)

        return img, landmarks, bbox
def main(input_path,
         output_dir=None,
         cache_path=None,
         seq_postfix='_dsfd_seq.pkl',
         out_postfix='.jpg',
         resolution=256,
         crop_scale=1.2):
    cache_path = os.path.splitext(
        input_path)[0] + seq_postfix if cache_path is None else cache_path
    if output_dir is None:
        output_dir = os.path.splitext(input_path)[0]
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

    # Verification
    if not os.path.isfile(input_path):
        raise RuntimeError('Input video does not exist: ' + input_path)
    if not os.path.isfile(cache_path):
        raise RuntimeError('Cache file does not exist: ' + cache_path)
    if not os.path.isdir(output_dir):
        raise RuntimeError('Output directory does not exist: ' + output_dir)

    print('=> Cropping image sequences from image: "%s"...' %
          os.path.basename(input_path))

    # Load sequences from file
    with open(cache_path, "rb") as fp:  # Unpickling
        seq_list = pickle.load(fp)

    # Read image from file
    img = cv2.imread(input_path)
    if img is None:
        raise RuntimeError('Failed to read image: ' + input_path)

    # For each sequence
    for s, seq in enumerate(seq_list):
        det = seq[0]

        # Crop image
        bbox = np.concatenate((det[:2], det[2:] - det[:2]))
        bbox = scale_bbox(bbox, crop_scale)
        img_cropped = crop_img(img, bbox)
        img_cropped = cv2.resize(img_cropped, (resolution, resolution),
                                 interpolation=cv2.INTER_CUBIC)

        # Write cropped image to file
        out_img_name = os.path.splitext(os.path.basename(
            input_path))[0] + '_seq%02d%s' % (seq.id, out_postfix)
        out_img_path = os.path.join(output_dir, out_img_name)
        cv2.imwrite(out_img_path, img_cropped)
    def __call__(self, x):
        """
        Args:
            x (numpy.ndarray or list of numpy.ndarray): Image (H x W x C) or pose (3) or bounding box (4)

        Returns:
            numpy.ndarray or list of numpy.ndarray: Transformed images or poses
        """
        if isinstance(x, (list, tuple)):
            if len(x) == 2 and is_img(x[0]) and is_bbox(x[1]):
                # Found image and bounding box pair
                img, bbox = x
                if self.det_format:
                    bbox = np.concatenate((bbox[:2], bbox[2:] - bbox[:2]))
                bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square)
                return crop_img(img, bbox_scaled, border=self.border_id, value=self.value)
            else:
                return [self.__call__(a) for a in x]

        return x
Exemple #4
0
def crop2img(img, crop, bbox):
    scaled_bbox = scale_bbox(bbox)
    scaled_crop = cv2.resize(crop, (scaled_bbox[3], scaled_bbox[2]),
                             interpolation=cv2.INTER_CUBIC)
    left = -scaled_bbox[0] if scaled_bbox[0] < 0 else 0
    top = -scaled_bbox[1] if scaled_bbox[1] < 0 else 0
    right = scaled_bbox[0] + scaled_bbox[2] - img.shape[1] if (
        scaled_bbox[0] + scaled_bbox[2] - img.shape[1]) > 0 else 0
    bottom = scaled_bbox[1] + scaled_bbox[3] - img.shape[0] if (
        scaled_bbox[1] + scaled_bbox[3] - img.shape[0]) > 0 else 0
    crop_bbox = np.array([
        left, top, scaled_bbox[2] - left - right, scaled_bbox[3] - top - bottom
    ])
    scaled_bbox += np.array([left, top, -left - right, -top - bottom])

    out_img = img.copy()
    out_img[scaled_bbox[1]:scaled_bbox[1] + scaled_bbox[3], scaled_bbox[0]:scaled_bbox[0] + scaled_bbox[2]] = \
        scaled_crop[crop_bbox[1]:crop_bbox[1] + crop_bbox[3], crop_bbox[0]:crop_bbox[0] + crop_bbox[2]]

    return out_img
    def _write_batch(self, tensors):
        batch_size = tensors[0].shape[0]

        # For each frame in the current batch of tensors
        for b in range(batch_size):
            # Handle full frames if output_crop was not specified
            full_frame_bgr, bbox = None, None
            if self._verbose == 0 and not self._output_crop:
                # Read frame from input video
                ret, full_frame_bgr = self._in_vid.read()
                assert full_frame_bgr is not None, \
                    f'Failed to read frame {self._frame_count} from input video: "{self._in_vid_path}"'

                # Get bounding box from sequence
                det = self._seq[self._frame_count - self._seq.start_index]
                bbox = np.concatenate((det[:2], det[2:] - det[:2]))
                bbox = scale_bbox(bbox, self._crop_scale)

            render_bgr = self.on_render(*[t[b] for t in tensors])
            self._render(render_bgr, full_frame_bgr, bbox)
            self._frame_count += 1
    def process(self, img_list, bbox_list, landmarks_list=None):
        # For each input image and corresponding landmarks
        for i in range(len(img_list)):
            if isinstance(img_list[i], (list, tuple)):
                if landmarks_list is None:
                    img_list[i], _ = self.process(img_list[i], bbox_list[i])
                else:
                    img_list[i], landmarks_list[i] = self.process(img_list[i], bbox_list[i], landmarks_list[i])
            else:
                if self.det_format:
                    bbox = np.concatenate((bbox_list[i][:2], bbox_list[i][2:] - bbox_list[i][:2]))
                else:
                    bbox = bbox_list[i]
                bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square)
                if landmarks_list is None:
                    img_list[i] = crop_img(img_list[i], bbox_scaled, border=self.border, value=self.value)
                else:
                    img_list[i], landmarks_list[i] = crop_img(img_list[i], bbox_scaled, landmarks_list[i], self.border,
                                                              self.value)

        return img_list, landmarks_list
Exemple #7
0
def align_crop(img, landmarks, bbox, scale=2.0, square=True):
    right_eye_center = landmarks[36:42, :].mean(axis=0)
    left_eye_center = landmarks[42:48, :].mean(axis=0)

    eye_center = (right_eye_center + left_eye_center) / 2.0
    dy = right_eye_center[1] - left_eye_center[1]
    dx = right_eye_center[0] - left_eye_center[0]
    angle = np.degrees(np.arctan2(dy, dx)) - 180

    M = cv2.getRotationMatrix2D(tuple(eye_center), angle, 1.)
    output = cv2.warpAffine(img,
                            M, (img.shape[1], img.shape[0]),
                            flags=cv2.INTER_CUBIC)

    new_landmarks = np.concatenate((landmarks, np.ones((68, 1))), axis=1)
    new_landmarks = new_landmarks.dot(M.transpose())

    bbox_scaled = scale_bbox(bbox, scale, square)

    output, new_landmarks = crop_img(output, new_landmarks, bbox_scaled)

    return output, new_landmarks
Exemple #8
0
    def run(self):
        """ Main processing loop. Intended to be executed on a separate process. """
        while self._running:
            task = self._input_queue.get()

            # Initialize new video rendering task
            if self._in_vid is None:
                self._in_vid_path, self._seq, out_vid_path = task[:3]
                additional_attributes = task[3]
                self._frame_count = 0

                # Add additional arguments as members
                for attr_name, attr_val in additional_attributes.items():
                    setattr(self, attr_name, attr_val)

                # Open input video
                self._in_vid = cv2.VideoCapture(self._in_vid_path)
                assert self._in_vid.isOpened(), f'Failed to open video: "{self._in_vid_path}"'

                in_total_frames = int(self._in_vid.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = self._in_vid.get(cv2.CAP_PROP_FPS)
                in_vid_width = int(self._in_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
                in_vid_height = int(self._in_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
                self._total_frames = in_total_frames if self._verbose == 0 else len(self._seq)
                # print(f'Debug: initializing video: "{self._in_vid_path}", total_frames={self._total_frames}')

                # Initialize output video
                if out_vid_path is not None:
                    out_size = (in_vid_width, in_vid_height)
                    if self._verbose <= 0 and self._output_crop:
                        out_size = (self._resolution, self._resolution)
                    elif self._verbose_size is not None:
                        out_size = self._verbose_size
                    self._out_vid = cv2.VideoWriter(out_vid_path, self._fourcc, fps, out_size)

                # Write frames as they are until the start of the sequence
                if self._verbose == 0:
                    for i in range(self._seq.start_index):
                        # Read frame
                        ret, frame_bgr = self._in_vid.read()
                        assert frame_bgr is not None, f'Failed to read frame {i} from input video: "{self._in_vid_path}"'
                        self._render(frame_bgr)
                        self._frame_count += 1

                continue

            # Write a batch of frames
            tensors = task
            batch_size = tensors[0].shape[0]

            # For each frame in the current batch of tensors
            for b in range(batch_size):
                # Handle full frames if output_crop was not specified
                full_frame_bgr, bbox = None, None
                if self._verbose == 0 and not self._output_crop:
                    # Read frame from input video
                    ret, full_frame_bgr = self._in_vid.read()
                    assert full_frame_bgr is not None, \
                        f'Failed to read frame {i} from input video: "{self._in_vid_path}"'

                    # Get bounding box from sequence
                    det = self._seq[self._frame_count - self._seq.start_index]
                    bbox = np.concatenate((det[:2], det[2:] - det[:2]))
                    bbox = scale_bbox(bbox, self._crop_scale)

                render_bgr = self.on_render(*[t[b] for t in tensors])
                self._render(render_bgr, full_frame_bgr, bbox)
                self._frame_count += 1
                # print(f'Debug: Writing frame: {self._frame_count}')

            # Check if we reached the end of the sequence
            if self._verbose == 0 and self._frame_count >= (self._seq.start_index + len(self._seq)):
                for i in range(self._seq.start_index + len(self._seq), self._total_frames):
                    # Read frame
                    ret, frame_bgr = self._in_vid.read()
                    assert frame_bgr is not None, f'Failed to read frame {i} from input video: "{self._in_vid_path}"'
                    self._render(frame_bgr)
                    self._frame_count += 1

            # Check if all frames have been processed
            if self._frame_count >= self._total_frames:
                # Clean up
                self._in_vid.release()
                self._out_vid.release()
                self._in_vid = None
                self._out_vid = None
                self._seq = None
                self._in_vid_path = None
                self._total_frames = None
                self._frame_count = 0

                # Notify job is finished
                self._reply_queue.put(True)
Exemple #9
0
def extract_landmarks_bboxes_euler_from_images(img_dir,
                                               face_pose,
                                               face_align=None,
                                               img_size=(224, 224),
                                               scale=1.2,
                                               device=None,
                                               cache_file=None):

    if face_align is None:
        face_align = face_alignment.FaceAlignment(
            face_alignment.LandmarksType._2D, flip_input=True)

    cache_file = img_dir + '.pkl' if cache_file is None else cache_file
    if not os.path.exists(cache_file):
        frame_indices = []
        landmarks = []
        bboxes = []
        eulers = []

        img_paths = glob(os.path.join(img_dir, '*.jpg'))

        for i, img_path in tqdm(enumerate(img_paths),
                                unit='images',
                                total=len(img_paths)):
            img_bgr = cv2.imread(img_path)
            if img_bgr is None:
                continue
            img_rgb = img_bgr[:, :, ::-1]
            detected_faces = face_align.face_detector.detect_from_image(
                img_bgr.copy())

            if len(detected_faces) == 0:
                continue
            curr_bbox = get_main_bbox(
                np.array(detected_faces)[:, :4], img_bgr.shape[:2])
            detected_faces = [curr_bbox]

            preds = face_align.get_landmarks(img_rgb, detected_faces)
            curr_landmarks = preds[0]

            curr_bbox[2:] = curr_bbox[2:] - curr_bbox[:2] + 1

            scaled_bbox = scale_bbox(curr_bbox, scale)
            cropped_frame_rgb, cropped_landmarks = crop_img(
                img_rgb, curr_landmarks, scaled_bbox)
            scaled_frame_rgb = np.array(
                F.resize(Image.fromarray(cropped_frame_rgb), img_size,
                         Image.BICUBIC))
            scaled_frame_tensor = rgb2tensor(
                scaled_frame_rgb.copy()).to(device)
            curr_euler = face_pose(scaled_frame_tensor)
            curr_euler = np.array([x.cpu().numpy() for x in curr_euler])

            frame_indices.append(i)
            landmarks.append(curr_landmarks)
            bboxes.append(curr_bbox)
            eulers.append(curr_euler)

        frame_indices = np.array(frame_indices)
        landmarks = np.array(landmarks)
        bboxes = np.array(bboxes)
        eulers = np.array(eulers)

        with open(cache_file, "wb") as fp:
            pickle.dump(frame_indices, fp)
            pickle.dump(landmarks, fp)
            pickle.dump(bboxes, fp)
            pickle.dump(eulers, fp)
    else:
        with open(cache_file, "rb") as fp:
            frame_indices = pickle.load(fp)
            landmarks = pickle.load(fp)
            bboxes = pickle.load(fp)
            eulers = pickle.load(fp)

    return frame_indices, landmarks, bboxes, eulers
Exemple #10
0
def main(input_path,
         output_dir=None,
         cache_path=None,
         seq_postfix='_dsfd_seq.pkl',
         resolution=256,
         crop_scale=2.0,
         select='all',
         disable_tqdm=False):
    cache_path = os.path.splitext(
        input_path)[0] + seq_postfix if cache_path is None else cache_path
    if output_dir is None:
        output_dir = os.path.splitext(input_path)[0]
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

    # Verification
    if not os.path.isfile(input_path):
        raise RuntimeError('Input video does not exist: ' + input_path)
    if not os.path.isfile(cache_path):
        raise RuntimeError('Cache file does not exist: ' + cache_path)
    if not os.path.isdir(output_dir):
        raise RuntimeError('Output directory does not exist: ' + output_dir)

    print('=> Cropping video sequences from video: "%s"...' %
          os.path.basename(input_path))

    # Load sequences from file
    with open(cache_path, "rb") as fp:  # Unpickling
        seq_list = pickle.load(fp)

    # Select sequences
    if select == 'longest':
        selected_seq_index = np.argmax([len(s) for s in seq_list])
        seq = seq_list[selected_seq_index]
        seq.id = 0
        seq_list = [seq]

    # Open input video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise RuntimeError('Failed to read video: ' + input_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # For each sequence initialize output video file
    out_vids = []
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    for seq in seq_list:
        curr_vid_name = os.path.splitext(
            os.path.basename(input_path))[0] + '_seq%02d.mp4' % seq.id
        curr_vid_path = os.path.join(output_dir, curr_vid_name)
        out_vids.append(
            cv2.VideoWriter(curr_vid_path, fourcc, fps,
                            (resolution, resolution)))

    # For each frame in the target video
    cropped_detections = [[] for seq in seq_list]
    cropped_landmarks = [[] for seq in seq_list]
    pbar = range(total_frames) if disable_tqdm else tqdm(range(total_frames))
    for i in pbar:
        ret, frame = cap.read()
        if frame is None:
            continue

        # For each sequence
        for s, seq in enumerate(seq_list):
            if i < seq.start_index or (seq.start_index + len(seq) - 1) < i:
                continue
            det = seq[i - seq.start_index]

            # Crop frame
            bbox = np.concatenate((det[:2], det[2:] - det[:2]))
            bbox = scale_bbox(bbox, crop_scale)
            frame_cropped = crop_img(frame, bbox)
            frame_cropped = cv2.resize(frame_cropped, (resolution, resolution),
                                       interpolation=cv2.INTER_CUBIC)

            # Write cropped frame to output video
            out_vids[s].write(frame_cropped)

            # Add cropped detection to list
            orig_size = bbox[2:]
            axes_scale = np.array([resolution, resolution]) / orig_size
            det[:2] -= bbox[:2]
            det[2:] -= bbox[:2]
            det[:2] *= axes_scale
            det[2:] *= axes_scale
            cropped_detections[s].append(det)

            # Add cropped landmarks to list
            if hasattr(seq, 'landmarks'):
                curr_landmarks = seq.landmarks[i - seq.start_index]
                curr_landmarks[:, :2] -= bbox[:2]

                # 3D landmarks case
                if curr_landmarks.shape[1] == 3:
                    axes_scale = np.append(axes_scale, axes_scale.mean())

                curr_landmarks *= axes_scale
                cropped_landmarks[s].append(curr_landmarks)

    # For each sequence write cropped sequence to file
    for s, seq in enumerate(seq_list):
        # seq.detections = np.array(cropped_detections[s])
        # if hasattr(seq, 'landmarks'):
        #     seq.landmarks = np.array(cropped_landmarks[s])
        # seq.start_index = 0

        # TODO: this is a hack to change class type (remove this later)
        out_seq = Sequence(0)
        out_seq.detections = np.array(cropped_detections[s])
        if hasattr(seq, 'landmarks'):
            out_seq.landmarks = np.array(cropped_landmarks[s])
        out_seq.id, out_seq.obj_id, out_seq.size_avg = seq.id, seq.obj_id, seq.size_avg

        # Write to file
        curr_out_name = os.path.splitext(os.path.basename(
            input_path))[0] + '_seq%02d%s' % (out_seq.id, seq_postfix)
        curr_out_path = os.path.join(output_dir, curr_out_name)
        with open(curr_out_path, "wb") as fp:  # Pickling
            pickle.dump([out_seq], fp)
Exemple #11
0
def main(input_path,
         output_path=None,
         seq_postfix='_dsfd_seq.pkl',
         output_postfix='_dsfd_seq_lms_euler.pkl',
         pose_model_path='weights/hopenet_robust_alpha1.pkl',
         smooth_det=False,
         smooth_euler=False,
         gpus=None,
         cpu_only=False,
         batch_size=16):
    cache_path = os.path.splitext(input_path)[0] + seq_postfix
    output_path = os.path.splitext(
        input_path)[0] + output_postfix if output_path is None else output_path

    # Initialize device
    torch.set_grad_enabled(False)
    device, gpus = set_device(gpus, not cpu_only)

    # Load sequences from file
    with open(cache_path, "rb") as fp:  # Unpickling
        seq_list = pickle.load(fp)

    # Load pose model
    face_pose = Hopenet().to(device)
    checkpoint = torch.load(pose_model_path)
    face_pose.load_state_dict(checkpoint)
    face_pose.train(False)

    # Open input video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise RuntimeError('Failed to read video: ' + input_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Smooth sequence bounding boxes
    if smooth_det:
        for seq in seq_list:
            seq.smooth()

    # For each sequence
    total_detections = sum([len(s) for s in seq_list])
    pbar = tqdm(range(total_detections), unit='detections')
    for seq in seq_list:
        euler = []
        frame_cropped_tensor_list = []
        cap.set(cv2.CAP_PROP_POS_FRAMES, seq.start_index)

        # For each detection bounding box in the current sequence
        for i, det in enumerate(seq.detections):
            ret, frame_bgr = cap.read()
            if frame_bgr is None:
                raise RuntimeError('Failed to read frame from video!')
            frame_rgb = frame_bgr[:, :, ::-1]

            # Crop frame
            bbox = np.concatenate((det[:2], det[2:] - det[:2]))
            bbox = scale_bbox(bbox, 1.2)
            frame_cropped_rgb = crop_img(frame_rgb, bbox)
            frame_cropped_rgb = cv2.resize(frame_cropped_rgb, (224, 224),
                                           interpolation=cv2.INTER_CUBIC)
            frame_cropped_tensor = rgb2tensor(frame_cropped_rgb).to(device)

            # Gather batches
            frame_cropped_tensor_list.append(frame_cropped_tensor)
            if len(frame_cropped_tensor_list) < batch_size and (i +
                                                                1) < len(seq):
                continue
            frame_cropped_tensor_batch = torch.cat(frame_cropped_tensor_list,
                                                   dim=0)

            # Calculate euler angles
            curr_euler_batch = face_pose(
                frame_cropped_tensor_batch)  # Yaw, Pitch, Roll
            curr_euler_batch = curr_euler_batch.cpu().numpy()

            # For each prediction in the batch
            for b, curr_euler in enumerate(curr_euler_batch):
                # Add euler to list
                euler.append(curr_euler)

                # Render
                # render_img = tensor2bgr(frame_cropped_tensor_batch[b]).copy()
                # cv2.putText(render_img, '(%.2f, %.2f, %.2f)' % (curr_euler[0], curr_euler[1], curr_euler[2]), (15, 15),
                #             cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # cv2.imshow('render_img', render_img)
                # if cv2.waitKey(0) & 0xFF == ord('q'):
                #     break

            # Clear lists
            frame_cropped_tensor_list.clear()

            pbar.update(len(frame_cropped_tensor_batch))

        # Add landmarks to sequence and optionally smooth them
        euler = np.array(euler)
        if smooth_euler:
            euler = smooth(euler)
        seq.euler = euler

    # Write final sequence list to file
    with open(output_path, "wb") as fp:  # Pickling
        pickle.dump(seq_list, fp)