def __call__(self, img, landmarks, bbox): img = np.array(img).copy() if self.align: img, landmarks = align_crop(img, landmarks, bbox, self.bbox_scale, self.bbox_square) else: bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square) img, landmarks = crop_img(img, landmarks, bbox_scaled) img = Image.fromarray(img) return img, landmarks, bbox
def main(input_path, output_dir=None, cache_path=None, seq_postfix='_dsfd_seq.pkl', out_postfix='.jpg', resolution=256, crop_scale=1.2): cache_path = os.path.splitext( input_path)[0] + seq_postfix if cache_path is None else cache_path if output_dir is None: output_dir = os.path.splitext(input_path)[0] if not os.path.isdir(output_dir): os.mkdir(output_dir) # Verification if not os.path.isfile(input_path): raise RuntimeError('Input video does not exist: ' + input_path) if not os.path.isfile(cache_path): raise RuntimeError('Cache file does not exist: ' + cache_path) if not os.path.isdir(output_dir): raise RuntimeError('Output directory does not exist: ' + output_dir) print('=> Cropping image sequences from image: "%s"...' % os.path.basename(input_path)) # Load sequences from file with open(cache_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Read image from file img = cv2.imread(input_path) if img is None: raise RuntimeError('Failed to read image: ' + input_path) # For each sequence for s, seq in enumerate(seq_list): det = seq[0] # Crop image bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, crop_scale) img_cropped = crop_img(img, bbox) img_cropped = cv2.resize(img_cropped, (resolution, resolution), interpolation=cv2.INTER_CUBIC) # Write cropped image to file out_img_name = os.path.splitext(os.path.basename( input_path))[0] + '_seq%02d%s' % (seq.id, out_postfix) out_img_path = os.path.join(output_dir, out_img_name) cv2.imwrite(out_img_path, img_cropped)
def __call__(self, x): """ Args: x (numpy.ndarray or list of numpy.ndarray): Image (H x W x C) or pose (3) or bounding box (4) Returns: numpy.ndarray or list of numpy.ndarray: Transformed images or poses """ if isinstance(x, (list, tuple)): if len(x) == 2 and is_img(x[0]) and is_bbox(x[1]): # Found image and bounding box pair img, bbox = x if self.det_format: bbox = np.concatenate((bbox[:2], bbox[2:] - bbox[:2])) bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square) return crop_img(img, bbox_scaled, border=self.border_id, value=self.value) else: return [self.__call__(a) for a in x] return x
def crop2img(img, crop, bbox): scaled_bbox = scale_bbox(bbox) scaled_crop = cv2.resize(crop, (scaled_bbox[3], scaled_bbox[2]), interpolation=cv2.INTER_CUBIC) left = -scaled_bbox[0] if scaled_bbox[0] < 0 else 0 top = -scaled_bbox[1] if scaled_bbox[1] < 0 else 0 right = scaled_bbox[0] + scaled_bbox[2] - img.shape[1] if ( scaled_bbox[0] + scaled_bbox[2] - img.shape[1]) > 0 else 0 bottom = scaled_bbox[1] + scaled_bbox[3] - img.shape[0] if ( scaled_bbox[1] + scaled_bbox[3] - img.shape[0]) > 0 else 0 crop_bbox = np.array([ left, top, scaled_bbox[2] - left - right, scaled_bbox[3] - top - bottom ]) scaled_bbox += np.array([left, top, -left - right, -top - bottom]) out_img = img.copy() out_img[scaled_bbox[1]:scaled_bbox[1] + scaled_bbox[3], scaled_bbox[0]:scaled_bbox[0] + scaled_bbox[2]] = \ scaled_crop[crop_bbox[1]:crop_bbox[1] + crop_bbox[3], crop_bbox[0]:crop_bbox[0] + crop_bbox[2]] return out_img
def _write_batch(self, tensors): batch_size = tensors[0].shape[0] # For each frame in the current batch of tensors for b in range(batch_size): # Handle full frames if output_crop was not specified full_frame_bgr, bbox = None, None if self._verbose == 0 and not self._output_crop: # Read frame from input video ret, full_frame_bgr = self._in_vid.read() assert full_frame_bgr is not None, \ f'Failed to read frame {self._frame_count} from input video: "{self._in_vid_path}"' # Get bounding box from sequence det = self._seq[self._frame_count - self._seq.start_index] bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, self._crop_scale) render_bgr = self.on_render(*[t[b] for t in tensors]) self._render(render_bgr, full_frame_bgr, bbox) self._frame_count += 1
def process(self, img_list, bbox_list, landmarks_list=None): # For each input image and corresponding landmarks for i in range(len(img_list)): if isinstance(img_list[i], (list, tuple)): if landmarks_list is None: img_list[i], _ = self.process(img_list[i], bbox_list[i]) else: img_list[i], landmarks_list[i] = self.process(img_list[i], bbox_list[i], landmarks_list[i]) else: if self.det_format: bbox = np.concatenate((bbox_list[i][:2], bbox_list[i][2:] - bbox_list[i][:2])) else: bbox = bbox_list[i] bbox_scaled = scale_bbox(bbox, self.bbox_scale, self.bbox_square) if landmarks_list is None: img_list[i] = crop_img(img_list[i], bbox_scaled, border=self.border, value=self.value) else: img_list[i], landmarks_list[i] = crop_img(img_list[i], bbox_scaled, landmarks_list[i], self.border, self.value) return img_list, landmarks_list
def align_crop(img, landmarks, bbox, scale=2.0, square=True): right_eye_center = landmarks[36:42, :].mean(axis=0) left_eye_center = landmarks[42:48, :].mean(axis=0) eye_center = (right_eye_center + left_eye_center) / 2.0 dy = right_eye_center[1] - left_eye_center[1] dx = right_eye_center[0] - left_eye_center[0] angle = np.degrees(np.arctan2(dy, dx)) - 180 M = cv2.getRotationMatrix2D(tuple(eye_center), angle, 1.) output = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_CUBIC) new_landmarks = np.concatenate((landmarks, np.ones((68, 1))), axis=1) new_landmarks = new_landmarks.dot(M.transpose()) bbox_scaled = scale_bbox(bbox, scale, square) output, new_landmarks = crop_img(output, new_landmarks, bbox_scaled) return output, new_landmarks
def run(self): """ Main processing loop. Intended to be executed on a separate process. """ while self._running: task = self._input_queue.get() # Initialize new video rendering task if self._in_vid is None: self._in_vid_path, self._seq, out_vid_path = task[:3] additional_attributes = task[3] self._frame_count = 0 # Add additional arguments as members for attr_name, attr_val in additional_attributes.items(): setattr(self, attr_name, attr_val) # Open input video self._in_vid = cv2.VideoCapture(self._in_vid_path) assert self._in_vid.isOpened(), f'Failed to open video: "{self._in_vid_path}"' in_total_frames = int(self._in_vid.get(cv2.CAP_PROP_FRAME_COUNT)) fps = self._in_vid.get(cv2.CAP_PROP_FPS) in_vid_width = int(self._in_vid.get(cv2.CAP_PROP_FRAME_WIDTH)) in_vid_height = int(self._in_vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) self._total_frames = in_total_frames if self._verbose == 0 else len(self._seq) # print(f'Debug: initializing video: "{self._in_vid_path}", total_frames={self._total_frames}') # Initialize output video if out_vid_path is not None: out_size = (in_vid_width, in_vid_height) if self._verbose <= 0 and self._output_crop: out_size = (self._resolution, self._resolution) elif self._verbose_size is not None: out_size = self._verbose_size self._out_vid = cv2.VideoWriter(out_vid_path, self._fourcc, fps, out_size) # Write frames as they are until the start of the sequence if self._verbose == 0: for i in range(self._seq.start_index): # Read frame ret, frame_bgr = self._in_vid.read() assert frame_bgr is not None, f'Failed to read frame {i} from input video: "{self._in_vid_path}"' self._render(frame_bgr) self._frame_count += 1 continue # Write a batch of frames tensors = task batch_size = tensors[0].shape[0] # For each frame in the current batch of tensors for b in range(batch_size): # Handle full frames if output_crop was not specified full_frame_bgr, bbox = None, None if self._verbose == 0 and not self._output_crop: # Read frame from input video ret, full_frame_bgr = self._in_vid.read() assert full_frame_bgr is not None, \ f'Failed to read frame {i} from input video: "{self._in_vid_path}"' # Get bounding box from sequence det = self._seq[self._frame_count - self._seq.start_index] bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, self._crop_scale) render_bgr = self.on_render(*[t[b] for t in tensors]) self._render(render_bgr, full_frame_bgr, bbox) self._frame_count += 1 # print(f'Debug: Writing frame: {self._frame_count}') # Check if we reached the end of the sequence if self._verbose == 0 and self._frame_count >= (self._seq.start_index + len(self._seq)): for i in range(self._seq.start_index + len(self._seq), self._total_frames): # Read frame ret, frame_bgr = self._in_vid.read() assert frame_bgr is not None, f'Failed to read frame {i} from input video: "{self._in_vid_path}"' self._render(frame_bgr) self._frame_count += 1 # Check if all frames have been processed if self._frame_count >= self._total_frames: # Clean up self._in_vid.release() self._out_vid.release() self._in_vid = None self._out_vid = None self._seq = None self._in_vid_path = None self._total_frames = None self._frame_count = 0 # Notify job is finished self._reply_queue.put(True)
def extract_landmarks_bboxes_euler_from_images(img_dir, face_pose, face_align=None, img_size=(224, 224), scale=1.2, device=None, cache_file=None): if face_align is None: face_align = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, flip_input=True) cache_file = img_dir + '.pkl' if cache_file is None else cache_file if not os.path.exists(cache_file): frame_indices = [] landmarks = [] bboxes = [] eulers = [] img_paths = glob(os.path.join(img_dir, '*.jpg')) for i, img_path in tqdm(enumerate(img_paths), unit='images', total=len(img_paths)): img_bgr = cv2.imread(img_path) if img_bgr is None: continue img_rgb = img_bgr[:, :, ::-1] detected_faces = face_align.face_detector.detect_from_image( img_bgr.copy()) if len(detected_faces) == 0: continue curr_bbox = get_main_bbox( np.array(detected_faces)[:, :4], img_bgr.shape[:2]) detected_faces = [curr_bbox] preds = face_align.get_landmarks(img_rgb, detected_faces) curr_landmarks = preds[0] curr_bbox[2:] = curr_bbox[2:] - curr_bbox[:2] + 1 scaled_bbox = scale_bbox(curr_bbox, scale) cropped_frame_rgb, cropped_landmarks = crop_img( img_rgb, curr_landmarks, scaled_bbox) scaled_frame_rgb = np.array( F.resize(Image.fromarray(cropped_frame_rgb), img_size, Image.BICUBIC)) scaled_frame_tensor = rgb2tensor( scaled_frame_rgb.copy()).to(device) curr_euler = face_pose(scaled_frame_tensor) curr_euler = np.array([x.cpu().numpy() for x in curr_euler]) frame_indices.append(i) landmarks.append(curr_landmarks) bboxes.append(curr_bbox) eulers.append(curr_euler) frame_indices = np.array(frame_indices) landmarks = np.array(landmarks) bboxes = np.array(bboxes) eulers = np.array(eulers) with open(cache_file, "wb") as fp: pickle.dump(frame_indices, fp) pickle.dump(landmarks, fp) pickle.dump(bboxes, fp) pickle.dump(eulers, fp) else: with open(cache_file, "rb") as fp: frame_indices = pickle.load(fp) landmarks = pickle.load(fp) bboxes = pickle.load(fp) eulers = pickle.load(fp) return frame_indices, landmarks, bboxes, eulers
def main(input_path, output_dir=None, cache_path=None, seq_postfix='_dsfd_seq.pkl', resolution=256, crop_scale=2.0, select='all', disable_tqdm=False): cache_path = os.path.splitext( input_path)[0] + seq_postfix if cache_path is None else cache_path if output_dir is None: output_dir = os.path.splitext(input_path)[0] if not os.path.isdir(output_dir): os.mkdir(output_dir) # Verification if not os.path.isfile(input_path): raise RuntimeError('Input video does not exist: ' + input_path) if not os.path.isfile(cache_path): raise RuntimeError('Cache file does not exist: ' + cache_path) if not os.path.isdir(output_dir): raise RuntimeError('Output directory does not exist: ' + output_dir) print('=> Cropping video sequences from video: "%s"...' % os.path.basename(input_path)) # Load sequences from file with open(cache_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Select sequences if select == 'longest': selected_seq_index = np.argmax([len(s) for s in seq_list]) seq = seq_list[selected_seq_index] seq.id = 0 seq_list = [seq] # Open input video file cap = cv2.VideoCapture(input_path) if not cap.isOpened(): raise RuntimeError('Failed to read video: ' + input_path) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # For each sequence initialize output video file out_vids = [] fourcc = cv2.VideoWriter_fourcc(*'avc1') for seq in seq_list: curr_vid_name = os.path.splitext( os.path.basename(input_path))[0] + '_seq%02d.mp4' % seq.id curr_vid_path = os.path.join(output_dir, curr_vid_name) out_vids.append( cv2.VideoWriter(curr_vid_path, fourcc, fps, (resolution, resolution))) # For each frame in the target video cropped_detections = [[] for seq in seq_list] cropped_landmarks = [[] for seq in seq_list] pbar = range(total_frames) if disable_tqdm else tqdm(range(total_frames)) for i in pbar: ret, frame = cap.read() if frame is None: continue # For each sequence for s, seq in enumerate(seq_list): if i < seq.start_index or (seq.start_index + len(seq) - 1) < i: continue det = seq[i - seq.start_index] # Crop frame bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, crop_scale) frame_cropped = crop_img(frame, bbox) frame_cropped = cv2.resize(frame_cropped, (resolution, resolution), interpolation=cv2.INTER_CUBIC) # Write cropped frame to output video out_vids[s].write(frame_cropped) # Add cropped detection to list orig_size = bbox[2:] axes_scale = np.array([resolution, resolution]) / orig_size det[:2] -= bbox[:2] det[2:] -= bbox[:2] det[:2] *= axes_scale det[2:] *= axes_scale cropped_detections[s].append(det) # Add cropped landmarks to list if hasattr(seq, 'landmarks'): curr_landmarks = seq.landmarks[i - seq.start_index] curr_landmarks[:, :2] -= bbox[:2] # 3D landmarks case if curr_landmarks.shape[1] == 3: axes_scale = np.append(axes_scale, axes_scale.mean()) curr_landmarks *= axes_scale cropped_landmarks[s].append(curr_landmarks) # For each sequence write cropped sequence to file for s, seq in enumerate(seq_list): # seq.detections = np.array(cropped_detections[s]) # if hasattr(seq, 'landmarks'): # seq.landmarks = np.array(cropped_landmarks[s]) # seq.start_index = 0 # TODO: this is a hack to change class type (remove this later) out_seq = Sequence(0) out_seq.detections = np.array(cropped_detections[s]) if hasattr(seq, 'landmarks'): out_seq.landmarks = np.array(cropped_landmarks[s]) out_seq.id, out_seq.obj_id, out_seq.size_avg = seq.id, seq.obj_id, seq.size_avg # Write to file curr_out_name = os.path.splitext(os.path.basename( input_path))[0] + '_seq%02d%s' % (out_seq.id, seq_postfix) curr_out_path = os.path.join(output_dir, curr_out_name) with open(curr_out_path, "wb") as fp: # Pickling pickle.dump([out_seq], fp)
def main(input_path, output_path=None, seq_postfix='_dsfd_seq.pkl', output_postfix='_dsfd_seq_lms_euler.pkl', pose_model_path='weights/hopenet_robust_alpha1.pkl', smooth_det=False, smooth_euler=False, gpus=None, cpu_only=False, batch_size=16): cache_path = os.path.splitext(input_path)[0] + seq_postfix output_path = os.path.splitext( input_path)[0] + output_postfix if output_path is None else output_path # Initialize device torch.set_grad_enabled(False) device, gpus = set_device(gpus, not cpu_only) # Load sequences from file with open(cache_path, "rb") as fp: # Unpickling seq_list = pickle.load(fp) # Load pose model face_pose = Hopenet().to(device) checkpoint = torch.load(pose_model_path) face_pose.load_state_dict(checkpoint) face_pose.train(False) # Open input video file cap = cv2.VideoCapture(input_path) if not cap.isOpened(): raise RuntimeError('Failed to read video: ' + input_path) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) input_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) input_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Smooth sequence bounding boxes if smooth_det: for seq in seq_list: seq.smooth() # For each sequence total_detections = sum([len(s) for s in seq_list]) pbar = tqdm(range(total_detections), unit='detections') for seq in seq_list: euler = [] frame_cropped_tensor_list = [] cap.set(cv2.CAP_PROP_POS_FRAMES, seq.start_index) # For each detection bounding box in the current sequence for i, det in enumerate(seq.detections): ret, frame_bgr = cap.read() if frame_bgr is None: raise RuntimeError('Failed to read frame from video!') frame_rgb = frame_bgr[:, :, ::-1] # Crop frame bbox = np.concatenate((det[:2], det[2:] - det[:2])) bbox = scale_bbox(bbox, 1.2) frame_cropped_rgb = crop_img(frame_rgb, bbox) frame_cropped_rgb = cv2.resize(frame_cropped_rgb, (224, 224), interpolation=cv2.INTER_CUBIC) frame_cropped_tensor = rgb2tensor(frame_cropped_rgb).to(device) # Gather batches frame_cropped_tensor_list.append(frame_cropped_tensor) if len(frame_cropped_tensor_list) < batch_size and (i + 1) < len(seq): continue frame_cropped_tensor_batch = torch.cat(frame_cropped_tensor_list, dim=0) # Calculate euler angles curr_euler_batch = face_pose( frame_cropped_tensor_batch) # Yaw, Pitch, Roll curr_euler_batch = curr_euler_batch.cpu().numpy() # For each prediction in the batch for b, curr_euler in enumerate(curr_euler_batch): # Add euler to list euler.append(curr_euler) # Render # render_img = tensor2bgr(frame_cropped_tensor_batch[b]).copy() # cv2.putText(render_img, '(%.2f, %.2f, %.2f)' % (curr_euler[0], curr_euler[1], curr_euler[2]), (15, 15), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) # cv2.imshow('render_img', render_img) # if cv2.waitKey(0) & 0xFF == ord('q'): # break # Clear lists frame_cropped_tensor_list.clear() pbar.update(len(frame_cropped_tensor_batch)) # Add landmarks to sequence and optionally smooth them euler = np.array(euler) if smooth_euler: euler = smooth(euler) seq.euler = euler # Write final sequence list to file with open(output_path, "wb") as fp: # Pickling pickle.dump(seq_list, fp)