def process(self, img_list, bbox_list, landmarks_list=None): # For each input image and corresponding landmarks for i in range(len(img_list)): if isinstance(img_list[i], (list, tuple)): if landmarks_list is None: img_list[i], _ = self.process(img_list[i], bbox_list[i]) else: img_list[i], landmarks_list[i] = self.process(img_list[i], bbox_list[i], landmarks_list[i]) else: if self.det_format: bbox = np.concatenate((bbox_list[i][:2], bbox_list[i][2:] - bbox_list[i][:2])) else: bbox = bbox_list[i] bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square) if landmarks_list is None: img_list[i] = crop_img(img_list[i], bbox_scaled, border=self.border, value=self.value) else: img_list[i], landmarks_list[i] = crop_img(img_list[i], bbox_scaled, landmarks_list[i], self.border, self.value) return img_list, landmarks_list
def __call__(self, img, landmarks, bbox): img = np.array(img).copy() if self.align: img, landmarks = align_crop(img, landmarks, bbox, self.bbox_scale, self.bbox_square) else: bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square) img, landmarks = crop_img(img, landmarks, bbox_scaled) img = Image.fromarray(img) return img, landmarks, bbox
def main(input_path, output_dir=None, cache_path=None, seq_postfix='_dsfd_seq.pkl', out_postfix='.jpg', resolution=256, crop_scale=1.2): cache_path = os.path.splitext( input_path)[0] + seq_postfix if cache_path is None else cache_path if output_dir is None: output_dir = os.path.splitext(input_path)[0] if not os.path.isdir(output_dir): os.mkdir(output_dir) # Verification if not os.path.isfile(input_path): raise RuntimeError('Input video does not exist: ' + input_path) if not os.path.isfile(cache_path): raise RuntimeError('Cache file does not exist: ' + cache_path) if not os.path.isdir(output_dir): raise RuntimeError('Output directory does not exist: ' + output_dir) print('=> Cropping image sequences from image: "%s"...' % os.path.basename(input_path)) # Load sequences from file with open(cache_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Read image from file img = cv2.imread(input_path) if img is None: raise RuntimeError('Failed to read image: ' + input_path) # For each sequence for s, seq in enumerate(seq_list): det = seq[0] # Crop image bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, crop_scale) img_cropped = crop_img(img, bbox) img_cropped = cv2.resize(img_cropped, (resolution, resolution), interpolation=cv2.INTER_CUBIC) # Write cropped image to file out_img_name = os.path.splitext(os.path.basename( input_path))[0] + '_seq%02d%s' % (seq.id, out_postfix) out_img_path = os.path.join(output_dir, out_img_name) cv2.imwrite(out_img_path, img_cropped)
def __call__(self, x): """ Args: x (numpy.ndarray or list of numpy.ndarray): Image (H x W x C) or pose (3) or bounding box (4) Returns: numpy.ndarray or list of numpy.ndarray: Transformed images or poses """ if isinstance(x, (list, tuple)): if len(x) == 2 and is_img(x[0]) and is_bbox(x[1]): # Found image and bounding box pair img, bbox = x if self.det_format: bbox = np.concatenate((bbox[:2], bbox[2:] - bbox[:2])) bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square) return crop_img(img, bbox_scaled, border=self.border_id, value=self.value) else: return [self.__call__(a) for a in x] return x
def align_crop(img, landmarks, bbox, scale=2.0, square=True): right_eye_center = landmarks[36:42, :].mean(axis=0) left_eye_center = landmarks[42:48, :].mean(axis=0) eye_center = (right_eye_center + left_eye_center) / 2.0 dy = right_eye_center[1] - left_eye_center[1] dx = right_eye_center[0] - left_eye_center[0] angle = np.degrees(np.arctan2(dy, dx)) - 180 M = cv2.getRotationMatrix2D(tuple(eye_center), angle, 1.) output = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_CUBIC) new_landmarks = np.concatenate((landmarks, np.ones((68, 1))), axis=1) new_landmarks = new_landmarks.dot(M.transpose()) bbox_scaled = scale_bbox(bbox, scale, square) output, new_landmarks = crop_img(output, new_landmarks, bbox_scaled) return output, new_landmarks
def extract_landmarks_bboxes_euler_from_images(img_dir, face_pose, face_align=None, img_size=(224, 224), scale=1.2, device=None, cache_file=None): if face_align is None: face_align = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, flip_input=True) cache_file = img_dir + '.pkl' if cache_file is None else cache_file if not os.path.exists(cache_file): frame_indices = [] landmarks = [] bboxes = [] eulers = [] img_paths = glob(os.path.join(img_dir, '*.jpg')) for i, img_path in tqdm(enumerate(img_paths), unit='images', total=len(img_paths)): img_bgr = cv2.imread(img_path) if img_bgr is None: continue img_rgb = img_bgr[:, :, ::-1] detected_faces = face_align.face_detector.detect_from_image( img_bgr.copy()) if len(detected_faces) == 0: continue curr_bbox = get_main_bbox( np.array(detected_faces)[:, :4], img_bgr.shape[:2]) detected_faces = [curr_bbox] preds = face_align.get_landmarks(img_rgb, detected_faces) curr_landmarks = preds[0] curr_bbox[2:] = curr_bbox[2:] - curr_bbox[:2] + 1 scaled_bbox = scale_bbox(curr_bbox, scale) cropped_frame_rgb, cropped_landmarks = crop_img( img_rgb, curr_landmarks, scaled_bbox) scaled_frame_rgb = np.array( F.resize(Image.fromarray(cropped_frame_rgb), img_size, Image.BICUBIC)) scaled_frame_tensor = rgb2tensor( scaled_frame_rgb.copy()).to(device) curr_euler = face_pose(scaled_frame_tensor) curr_euler = np.array([x.cpu().numpy() for x in curr_euler]) frame_indices.append(i) landmarks.append(curr_landmarks) bboxes.append(curr_bbox) eulers.append(curr_euler) frame_indices = np.array(frame_indices) landmarks = np.array(landmarks) bboxes = np.array(bboxes) eulers = np.array(eulers) with open(cache_file, "wb") as fp: pickle.dump(frame_indices, fp) pickle.dump(landmarks, fp) pickle.dump(bboxes, fp) pickle.dump(eulers, fp) else: with open(cache_file, "rb") as fp: frame_indices = pickle.load(fp) landmarks = pickle.load(fp) bboxes = pickle.load(fp) eulers = pickle.load(fp) return frame_indices, landmarks, bboxes, eulers
def main(input_path, output_dir=None, cache_path=None, seq_postfix='_dsfd_seq.pkl', resolution=256, crop_scale=2.0, select='all', disable_tqdm=False): cache_path = os.path.splitext( input_path)[0] + seq_postfix if cache_path is None else cache_path if output_dir is None: output_dir = os.path.splitext(input_path)[0] if not os.path.isdir(output_dir): os.mkdir(output_dir) # Verification if not os.path.isfile(input_path): raise RuntimeError('Input video does not exist: ' + input_path) if not os.path.isfile(cache_path): raise RuntimeError('Cache file does not exist: ' + cache_path) if not os.path.isdir(output_dir): raise RuntimeError('Output directory does not exist: ' + output_dir) print('=> Cropping video sequences from video: "%s"...' % os.path.basename(input_path)) # Load sequences from file with open(cache_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Select sequences if select == 'longest': selected_seq_index = np.argmax([len(s) for s in seq_list]) seq = seq_list[selected_seq_index] seq.id = 0 seq_list = [seq] # Open input video file cap = cv2.VideoCapture(input_path) if not cap.isOpened(): raise RuntimeError('Failed to read video: ' + input_path) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # For each sequence initialize output video file out_vids = [] fourcc = cv2.VideoWriter_fourcc(*'avc1') for seq in seq_list: curr_vid_name = os.path.splitext( os.path.basename(input_path))[0] + '_seq%02d.mp4' % seq.id curr_vid_path = os.path.join(output_dir, curr_vid_name) out_vids.append( cv2.VideoWriter(curr_vid_path, fourcc, fps, (resolution, resolution))) # For each frame in the target video cropped_detections = [[] for seq in seq_list] cropped_landmarks = [[] for seq in seq_list] pbar = range(total_frames) if disable_tqdm else tqdm(range(total_frames)) for i in pbar: ret, frame = cap.read() if frame is None: continue # For each sequence for s, seq in enumerate(seq_list): if i < seq.start_index or (seq.start_index + len(seq) - 1) < i: continue det = seq[i - seq.start_index] # Crop frame bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, crop_scale) frame_cropped = crop_img(frame, bbox) frame_cropped = cv2.resize(frame_cropped, (resolution, resolution), interpolation=cv2.INTER_CUBIC) # Write cropped frame to output video out_vids[s].write(frame_cropped) # Add cropped detection to list orig_size = bbox[2:] axes_scale = np.array([resolution, resolution]) / orig_size det[:2] -= bbox[:2] det[2:] -= bbox[:2] det[:2] *= axes_scale det[2:] *= axes_scale cropped_detections[s].append(det) # Add cropped landmarks to list if hasattr(seq, 'landmarks'): curr_landmarks = seq.landmarks[i - seq.start_index] curr_landmarks[:, :2] -= bbox[:2] # 3D landmarks case if curr_landmarks.shape[1] == 3: axes_scale = np.append(axes_scale, axes_scale.mean()) curr_landmarks *= axes_scale cropped_landmarks[s].append(curr_landmarks) # For each sequence write cropped sequence to file for s, seq in enumerate(seq_list): # seq.detections = np.array(cropped_detections[s]) # if hasattr(seq, 'landmarks'): # seq.landmarks = np.array(cropped_landmarks[s]) # seq.start_index = 0 # TODO: this is a hack to change class type (remove this later) out_seq = Sequence(0) out_seq.detections = np.array(cropped_detections[s]) if hasattr(seq, 'landmarks'): out_seq.landmarks = np.array(cropped_landmarks[s]) out_seq.id, out_seq.obj_id, out_seq.size_avg = seq.id, seq.obj_id, seq.size_avg # Write to file curr_out_name = os.path.splitext(os.path.basename( input_path))[0] + '_seq%02d%s' % (out_seq.id, seq_postfix) curr_out_path = os.path.join(output_dir, curr_out_name) with open(curr_out_path, "wb") as fp: # Pickling pickle.dump([out_seq], fp)
def main(input_path, output_path=None, seq_postfix='_dsfd_seq.pkl', output_postfix='_dsfd_seq_lms_euler.pkl', pose_model_path='weights/hopenet_robust_alpha1.pkl', smooth_det=False, smooth_euler=False, gpus=None, cpu_only=False, batch_size=16): cache_path = os.path.splitext(input_path)[0] + seq_postfix output_path = os.path.splitext( input_path)[0] + output_postfix if output_path is None else output_path # Initialize device torch.set_grad_enabled(False) device, gpus = set_device(gpus, not cpu_only) # Load sequences from file with open(cache_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Load pose model face_pose = Hopenet().to(device) checkpoint = torch.load(pose_model_path) face_pose.load_state_dict(checkpoint) face_pose.train(False) # Open input video file cap = cv2.VideoCapture(input_path) if not cap.isOpened(): raise RuntimeError('Failed to read video: ' + input_path) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Smooth sequence bounding boxes if smooth_det: for seq in seq_list: seq.smooth() # For each sequence total_detections = sum([len(s) for s in seq_list]) pbar = tqdm(range(total_detections), unit='detections') for seq in seq_list: euler = [] frame_cropped_tensor_list = [] cap.set(cv2.CAP_PROP_POS_FRAMES, seq.start_index) # For each detection bounding box in the current sequence for i, det in enumerate(seq.detections): ret, frame_bgr = cap.read() if frame_bgr is None: raise RuntimeError('Failed to read frame from video!') frame_rgb = frame_bgr[:, :, ::-1] # Crop frame bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, 1.2) frame_cropped_rgb = crop_img(frame_rgb, bbox) frame_cropped_rgb = cv2.resize(frame_cropped_rgb, (224, 224), interpolation=cv2.INTER_CUBIC) frame_cropped_tensor = rgb2tensor(frame_cropped_rgb).to(device) # Gather batches frame_cropped_tensor_list.append(frame_cropped_tensor) if len(frame_cropped_tensor_list) < batch_size and (i + 1) < len(seq): continue frame_cropped_tensor_batch = torch.cat(frame_cropped_tensor_list, dim=0) # Calculate euler angles curr_euler_batch = face_pose( frame_cropped_tensor_batch) # Yaw, Pitch, Roll curr_euler_batch = curr_euler_batch.cpu().numpy() # For each prediction in the batch for b, curr_euler in enumerate(curr_euler_batch): # Add euler to list euler.append(curr_euler) # Render # render_img = tensor2bgr(frame_cropped_tensor_batch[b]).copy() # cv2.putText(render_img, '(%.2f, %.2f, %.2f)' % (curr_euler[0], curr_euler[1], curr_euler[2]), (15, 15), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) # cv2.imshow('render_img', render_img) # if cv2.waitKey(0) & 0xFF == ord('q'): # break # Clear lists frame_cropped_tensor_list.clear() pbar.update(len(frame_cropped_tensor_batch)) # Add landmarks to sequence and optionally smooth them euler = np.array(euler) if smooth_euler: euler = smooth(euler) seq.euler = euler # Write final sequence list to file with open(output_path, "wb") as fp: # Pickling pickle.dump(seq_list, fp)