def process(self, img_list, bbox_list, landmarks_list=None):
        # For each input image and corresponding landmarks
        for i in range(len(img_list)):
            if isinstance(img_list[i], (list, tuple)):
                if landmarks_list is None:
                    img_list[i], _ = self.process(img_list[i], bbox_list[i])
                else:
                    img_list[i], landmarks_list[i] = self.process(img_list[i], bbox_list[i], landmarks_list[i])
            else:
                if self.det_format:
                    bbox = np.concatenate((bbox_list[i][:2], bbox_list[i][2:] - bbox_list[i][:2]))
                else:
                    bbox = bbox_list[i]
                bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square)
                if landmarks_list is None:
                    img_list[i] = crop_img(img_list[i], bbox_scaled, border=self.border, value=self.value)
                else:
                    img_list[i], landmarks_list[i] = crop_img(img_list[i], bbox_scaled, landmarks_list[i], self.border,
                                                              self.value)

        return img_list, landmarks_list
Ejemplo n.º 2
0
    def __call__(self, img, landmarks, bbox):
        img = np.array(img).copy()
        if self.align:
            img, landmarks = align_crop(img, landmarks, bbox, self.bbox_scale,
                                        self.bbox_square)
        else:
            bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square)
            img, landmarks = crop_img(img, landmarks, bbox_scaled)

        img = Image.fromarray(img)

        return img, landmarks, bbox
def main(input_path,
         output_dir=None,
         cache_path=None,
         seq_postfix='_dsfd_seq.pkl',
         out_postfix='.jpg',
         resolution=256,
         crop_scale=1.2):
    cache_path = os.path.splitext(
        input_path)[0] + seq_postfix if cache_path is None else cache_path
    if output_dir is None:
        output_dir = os.path.splitext(input_path)[0]
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

    # Verification
    if not os.path.isfile(input_path):
        raise RuntimeError('Input video does not exist: ' + input_path)
    if not os.path.isfile(cache_path):
        raise RuntimeError('Cache file does not exist: ' + cache_path)
    if not os.path.isdir(output_dir):
        raise RuntimeError('Output directory does not exist: ' + output_dir)

    print('=> Cropping image sequences from image: "%s"...' %
          os.path.basename(input_path))

    # Load sequences from file
    with open(cache_path, "rb") as fp:  # Unpickling
        seq_list = pickle.load(fp)

    # Read image from file
    img = cv2.imread(input_path)
    if img is None:
        raise RuntimeError('Failed to read image: ' + input_path)

    # For each sequence
    for s, seq in enumerate(seq_list):
        det = seq[0]

        # Crop image
        bbox = np.concatenate((det[:2], det[2:] - det[:2]))
        bbox = scale_bbox(bbox, crop_scale)
        img_cropped = crop_img(img, bbox)
        img_cropped = cv2.resize(img_cropped, (resolution, resolution),
                                 interpolation=cv2.INTER_CUBIC)

        # Write cropped image to file
        out_img_name = os.path.splitext(os.path.basename(
            input_path))[0] + '_seq%02d%s' % (seq.id, out_postfix)
        out_img_path = os.path.join(output_dir, out_img_name)
        cv2.imwrite(out_img_path, img_cropped)
Ejemplo n.º 4
0
    def __call__(self, x):
        """
        Args:
            x (numpy.ndarray or list of numpy.ndarray): Image (H x W x C) or pose (3) or bounding box (4)

        Returns:
            numpy.ndarray or list of numpy.ndarray: Transformed images or poses
        """
        if isinstance(x, (list, tuple)):
            if len(x) == 2 and is_img(x[0]) and is_bbox(x[1]):
                # Found image and bounding box pair
                img, bbox = x
                if self.det_format:
                    bbox = np.concatenate((bbox[:2], bbox[2:] - bbox[:2]))
                bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square)
                return crop_img(img, bbox_scaled, border=self.border_id, value=self.value)
            else:
                return [self.__call__(a) for a in x]

        return x
Ejemplo n.º 5
0
def align_crop(img, landmarks, bbox, scale=2.0, square=True):
    right_eye_center = landmarks[36:42, :].mean(axis=0)
    left_eye_center = landmarks[42:48, :].mean(axis=0)

    eye_center = (right_eye_center + left_eye_center) / 2.0
    dy = right_eye_center[1] - left_eye_center[1]
    dx = right_eye_center[0] - left_eye_center[0]
    angle = np.degrees(np.arctan2(dy, dx)) - 180

    M = cv2.getRotationMatrix2D(tuple(eye_center), angle, 1.)
    output = cv2.warpAffine(img,
                            M, (img.shape[1], img.shape[0]),
                            flags=cv2.INTER_CUBIC)

    new_landmarks = np.concatenate((landmarks, np.ones((68, 1))), axis=1)
    new_landmarks = new_landmarks.dot(M.transpose())

    bbox_scaled = scale_bbox(bbox, scale, square)

    output, new_landmarks = crop_img(output, new_landmarks, bbox_scaled)

    return output, new_landmarks
Ejemplo n.º 6
0
def extract_landmarks_bboxes_euler_from_images(img_dir,
                                               face_pose,
                                               face_align=None,
                                               img_size=(224, 224),
                                               scale=1.2,
                                               device=None,
                                               cache_file=None):

    if face_align is None:
        face_align = face_alignment.FaceAlignment(
            face_alignment.LandmarksType._2D, flip_input=True)

    cache_file = img_dir + '.pkl' if cache_file is None else cache_file
    if not os.path.exists(cache_file):
        frame_indices = []
        landmarks = []
        bboxes = []
        eulers = []

        img_paths = glob(os.path.join(img_dir, '*.jpg'))

        for i, img_path in tqdm(enumerate(img_paths),
                                unit='images',
                                total=len(img_paths)):
            img_bgr = cv2.imread(img_path)
            if img_bgr is None:
                continue
            img_rgb = img_bgr[:, :, ::-1]
            detected_faces = face_align.face_detector.detect_from_image(
                img_bgr.copy())

            if len(detected_faces) == 0:
                continue
            curr_bbox = get_main_bbox(
                np.array(detected_faces)[:, :4], img_bgr.shape[:2])
            detected_faces = [curr_bbox]

            preds = face_align.get_landmarks(img_rgb, detected_faces)
            curr_landmarks = preds[0]

            curr_bbox[2:] = curr_bbox[2:] - curr_bbox[:2] + 1

            scaled_bbox = scale_bbox(curr_bbox, scale)
            cropped_frame_rgb, cropped_landmarks = crop_img(
                img_rgb, curr_landmarks, scaled_bbox)
            scaled_frame_rgb = np.array(
                F.resize(Image.fromarray(cropped_frame_rgb), img_size,
                         Image.BICUBIC))
            scaled_frame_tensor = rgb2tensor(
                scaled_frame_rgb.copy()).to(device)
            curr_euler = face_pose(scaled_frame_tensor)
            curr_euler = np.array([x.cpu().numpy() for x in curr_euler])

            frame_indices.append(i)
            landmarks.append(curr_landmarks)
            bboxes.append(curr_bbox)
            eulers.append(curr_euler)

        frame_indices = np.array(frame_indices)
        landmarks = np.array(landmarks)
        bboxes = np.array(bboxes)
        eulers = np.array(eulers)

        with open(cache_file, "wb") as fp:
            pickle.dump(frame_indices, fp)
            pickle.dump(landmarks, fp)
            pickle.dump(bboxes, fp)
            pickle.dump(eulers, fp)
    else:
        with open(cache_file, "rb") as fp:
            frame_indices = pickle.load(fp)
            landmarks = pickle.load(fp)
            bboxes = pickle.load(fp)
            eulers = pickle.load(fp)

    return frame_indices, landmarks, bboxes, eulers
Ejemplo n.º 7
0
def main(input_path,
         output_dir=None,
         cache_path=None,
         seq_postfix='_dsfd_seq.pkl',
         resolution=256,
         crop_scale=2.0,
         select='all',
         disable_tqdm=False):
    cache_path = os.path.splitext(
        input_path)[0] + seq_postfix if cache_path is None else cache_path
    if output_dir is None:
        output_dir = os.path.splitext(input_path)[0]
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

    # Verification
    if not os.path.isfile(input_path):
        raise RuntimeError('Input video does not exist: ' + input_path)
    if not os.path.isfile(cache_path):
        raise RuntimeError('Cache file does not exist: ' + cache_path)
    if not os.path.isdir(output_dir):
        raise RuntimeError('Output directory does not exist: ' + output_dir)

    print('=> Cropping video sequences from video: "%s"...' %
          os.path.basename(input_path))

    # Load sequences from file
    with open(cache_path, "rb") as fp:  # Unpickling
        seq_list = pickle.load(fp)

    # Select sequences
    if select == 'longest':
        selected_seq_index = np.argmax([len(s) for s in seq_list])
        seq = seq_list[selected_seq_index]
        seq.id = 0
        seq_list = [seq]

    # Open input video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise RuntimeError('Failed to read video: ' + input_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # For each sequence initialize output video file
    out_vids = []
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    for seq in seq_list:
        curr_vid_name = os.path.splitext(
            os.path.basename(input_path))[0] + '_seq%02d.mp4' % seq.id
        curr_vid_path = os.path.join(output_dir, curr_vid_name)
        out_vids.append(
            cv2.VideoWriter(curr_vid_path, fourcc, fps,
                            (resolution, resolution)))

    # For each frame in the target video
    cropped_detections = [[] for seq in seq_list]
    cropped_landmarks = [[] for seq in seq_list]
    pbar = range(total_frames) if disable_tqdm else tqdm(range(total_frames))
    for i in pbar:
        ret, frame = cap.read()
        if frame is None:
            continue

        # For each sequence
        for s, seq in enumerate(seq_list):
            if i < seq.start_index or (seq.start_index + len(seq) - 1) < i:
                continue
            det = seq[i - seq.start_index]

            # Crop frame
            bbox = np.concatenate((det[:2], det[2:] - det[:2]))
            bbox = scale_bbox(bbox, crop_scale)
            frame_cropped = crop_img(frame, bbox)
            frame_cropped = cv2.resize(frame_cropped, (resolution, resolution),
                                       interpolation=cv2.INTER_CUBIC)

            # Write cropped frame to output video
            out_vids[s].write(frame_cropped)

            # Add cropped detection to list
            orig_size = bbox[2:]
            axes_scale = np.array([resolution, resolution]) / orig_size
            det[:2] -= bbox[:2]
            det[2:] -= bbox[:2]
            det[:2] *= axes_scale
            det[2:] *= axes_scale
            cropped_detections[s].append(det)

            # Add cropped landmarks to list
            if hasattr(seq, 'landmarks'):
                curr_landmarks = seq.landmarks[i - seq.start_index]
                curr_landmarks[:, :2] -= bbox[:2]

                # 3D landmarks case
                if curr_landmarks.shape[1] == 3:
                    axes_scale = np.append(axes_scale, axes_scale.mean())

                curr_landmarks *= axes_scale
                cropped_landmarks[s].append(curr_landmarks)

    # For each sequence write cropped sequence to file
    for s, seq in enumerate(seq_list):
        # seq.detections = np.array(cropped_detections[s])
        # if hasattr(seq, 'landmarks'):
        #     seq.landmarks = np.array(cropped_landmarks[s])
        # seq.start_index = 0

        # TODO: this is a hack to change class type (remove this later)
        out_seq = Sequence(0)
        out_seq.detections = np.array(cropped_detections[s])
        if hasattr(seq, 'landmarks'):
            out_seq.landmarks = np.array(cropped_landmarks[s])
        out_seq.id, out_seq.obj_id, out_seq.size_avg = seq.id, seq.obj_id, seq.size_avg

        # Write to file
        curr_out_name = os.path.splitext(os.path.basename(
            input_path))[0] + '_seq%02d%s' % (out_seq.id, seq_postfix)
        curr_out_path = os.path.join(output_dir, curr_out_name)
        with open(curr_out_path, "wb") as fp:  # Pickling
            pickle.dump([out_seq], fp)
Ejemplo n.º 8
0
def main(input_path,
         output_path=None,
         seq_postfix='_dsfd_seq.pkl',
         output_postfix='_dsfd_seq_lms_euler.pkl',
         pose_model_path='weights/hopenet_robust_alpha1.pkl',
         smooth_det=False,
         smooth_euler=False,
         gpus=None,
         cpu_only=False,
         batch_size=16):
    cache_path = os.path.splitext(input_path)[0] + seq_postfix
    output_path = os.path.splitext(
        input_path)[0] + output_postfix if output_path is None else output_path

    # Initialize device
    torch.set_grad_enabled(False)
    device, gpus = set_device(gpus, not cpu_only)

    # Load sequences from file
    with open(cache_path, "rb") as fp:  # Unpickling
        seq_list = pickle.load(fp)

    # Load pose model
    face_pose = Hopenet().to(device)
    checkpoint = torch.load(pose_model_path)
    face_pose.load_state_dict(checkpoint)
    face_pose.train(False)

    # Open input video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise RuntimeError('Failed to read video: ' + input_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Smooth sequence bounding boxes
    if smooth_det:
        for seq in seq_list:
            seq.smooth()

    # For each sequence
    total_detections = sum([len(s) for s in seq_list])
    pbar = tqdm(range(total_detections), unit='detections')
    for seq in seq_list:
        euler = []
        frame_cropped_tensor_list = []
        cap.set(cv2.CAP_PROP_POS_FRAMES, seq.start_index)

        # For each detection bounding box in the current sequence
        for i, det in enumerate(seq.detections):
            ret, frame_bgr = cap.read()
            if frame_bgr is None:
                raise RuntimeError('Failed to read frame from video!')
            frame_rgb = frame_bgr[:, :, ::-1]

            # Crop frame
            bbox = np.concatenate((det[:2], det[2:] - det[:2]))
            bbox = scale_bbox(bbox, 1.2)
            frame_cropped_rgb = crop_img(frame_rgb, bbox)
            frame_cropped_rgb = cv2.resize(frame_cropped_rgb, (224, 224),
                                           interpolation=cv2.INTER_CUBIC)
            frame_cropped_tensor = rgb2tensor(frame_cropped_rgb).to(device)

            # Gather batches
            frame_cropped_tensor_list.append(frame_cropped_tensor)
            if len(frame_cropped_tensor_list) < batch_size and (i +
                                                                1) < len(seq):
                continue
            frame_cropped_tensor_batch = torch.cat(frame_cropped_tensor_list,
                                                   dim=0)

            # Calculate euler angles
            curr_euler_batch = face_pose(
                frame_cropped_tensor_batch)  # Yaw, Pitch, Roll
            curr_euler_batch = curr_euler_batch.cpu().numpy()

            # For each prediction in the batch
            for b, curr_euler in enumerate(curr_euler_batch):
                # Add euler to list
                euler.append(curr_euler)

                # Render
                # render_img = tensor2bgr(frame_cropped_tensor_batch[b]).copy()
                # cv2.putText(render_img, '(%.2f, %.2f, %.2f)' % (curr_euler[0], curr_euler[1], curr_euler[2]), (15, 15),
                #             cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # cv2.imshow('render_img', render_img)
                # if cv2.waitKey(0) & 0xFF == ord('q'):
                #     break

            # Clear lists
            frame_cropped_tensor_list.clear()

            pbar.update(len(frame_cropped_tensor_batch))

        # Add landmarks to sequence and optionally smooth them
        euler = np.array(euler)
        if smooth_euler:
            euler = smooth(euler)
        seq.euler = euler

    # Write final sequence list to file
    with open(output_path, "wb") as fp:  # Pickling
        pickle.dump(seq_list, fp)