def apply_transform(self, input: Tensor, params: Dict[str, Tensor], transform: Optional[Tensor] = None) -> Tensor: _, _, height, width = input.shape transform = cast(Tensor, transform) return warp_affine( input, transform[:, :2, :], (height, width), self.flags["resample"].name.lower(), align_corners=self.flags["align_corners"], padding_mode=self.flags["padding_mode"].name.lower(), )
def _apply_affine(input: torch.Tensor, params: Dict[str, torch.Tensor], return_transform: bool = False) -> UnionType: if not torch.is_tensor(input): raise TypeError(f"Input type is not a torch.Tensor. Got {type(input)}") r"""Random affine transformation of the image keeping center invariant Args: input (torch.Tensor): Tensor to be transformed with shape (H, W), (C, H, W), (*, C, H, W). degrees (float or tuple): Range of degrees to select from. If degrees is a number instead of sequence like (min, max), the range of degrees will be (-degrees, +degrees). Set to 0 to deactivate rotations. translate (tuple, optional): tuple of maximum absolute fraction for horizontal and vertical translations. For example translate=(a, b), then horizontal shift is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is randomly sampled from the range a <= scale <= b. Will keep original scale by default. shear (sequence or float, optional): Range of degrees to select from. If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) will be applied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values, a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. Will not apply shear by default return_transform (bool): if ``True`` return the matrix describing the transformation applied to each. Default: False. mode (str): interpolation mode to calculate output values 'bilinear' | 'nearest'. Default: 'bilinear'. padding_mode (str): padding mode for outside grid values 'zeros' | 'border' | 'reflection'. Default: 'zeros'. """ input = _transform_input(input) device: torch.device = input.device dtype: torch.dtype = input.dtype # arrange input data x_data: torch.Tensor = input.view(-1, *input.shape[-3:]) height, width = x_data.shape[-2:] transform: torch.Tensor = params['transform'].to(device, dtype) out_data: torch.Tensor = warp_affine(x_data, transform[:, :2, :], (height, width)) if return_transform: return out_data.view_as(input), transform return out_data.view_as(input)
def affine(tensor, matrix): """Apply an affine transformation to the image. Args: tensor (torch.Tensor): The image tensor to be warped. matrix (torch.Tensor): The 2x3 affine transformation matrix. Returns: Tensor: The warped image. """ is_unbatched = tensor.ndimension() == 3 if is_unbatched: tensor = tensor.unsqueeze(0) warped = warp_affine(tensor, matrix, tensor.size()[-2:]) if is_unbatched: warped = warped.squeeze(0) return warped
def forward(self, front_proj, back_proj, left_proj, right_proj): # print(self.M) self.M_fixed = torch.tensor([[1., 0., 0.], [0., 1., 0.]], requires_grad=False) mask = torch.tensor([[0, 0, 1], [0, 0, 1]], requires_grad=False) # how to combine the two self.M = (mask * self.M_tune + (1 - mask) * self.M_fixed).unsqueeze(0) front_proj = warp_affine(front_proj, self.M, (self.cols, self.rows)) back_proj = warp_affine(back_proj, self.M, (self.cols, self.rows)) left_proj = warp_affine(left_proj, self.M, (self.cols, self.rows)) right_proj = warp_affine(right_proj, self.M, (self.cols, self.rows)) front_proj_outside = torch.zeros_like(front_proj) front_proj = crop_and_resize(front_proj, boxes=self.boxes, size=(self.rows - self.k, self.cols - self.k)) front_proj = front_proj + self.n - self.n front_proj_outside[:, :, int(self.rows / 2) - int(front_proj.shape[2] / 2):int(self.rows / 2) + int(front_proj.shape[2] / 2), int(self.rows / 2) - int(front_proj.shape[2] / 2):int(self.rows / 2) + int(front_proj.shape[2] / 2)] = front_proj back_proj_outside = torch.zeros_like(back_proj) back_proj = crop_and_resize(back_proj, boxes=self.boxes, size=(self.rows - self.k, self.cols - self.k)) back_proj = back_proj + self.n - self.n back_proj_outside[:, :, int(self.rows / 2) - int(back_proj.shape[2] / 2):int(self.rows / 2) + int(back_proj.shape[2] / 2), int(self.rows / 2) - int(back_proj.shape[2] / 2):int(self.rows / 2) + int(back_proj.shape[2] / 2)] = back_proj left_proj_outside = torch.zeros_like(left_proj) left_proj = crop_and_resize(left_proj, boxes=self.boxes, size=(self.rows - self.k, self.cols - self.k)) left_proj = left_proj + self.n - self.n left_proj_outside[:, :, int(self.rows / 2) - int(left_proj.shape[2] / 2):int(self.rows / 2) + int(left_proj.shape[2] / 2), int(self.rows / 2) - int(left_proj.shape[2] / 2):int(self.rows / 2) + int(left_proj.shape[2] / 2)] = left_proj right_proj_outside = torch.zeros_like(right_proj) right_proj = crop_and_resize(right_proj, boxes=self.boxes, size=(self.rows - self.k, self.cols - self.k)) right_proj = right_proj + self.n - self.n right_proj_outside[:, :, int(self.rows / 2) - int(right_proj.shape[2] / 2):int(self.rows / 2) + int(right_proj.shape[2] / 2), int(self.rows / 2) - int(right_proj.shape[2] / 2):int(self.rows / 2) + int(right_proj.shape[2] / 2)] = right_proj return front_proj_outside, back_proj_outside, left_proj_outside, right_proj_outside
def main(arg): devices = get_devices_list(arg) # load network print('***** ' + arg.dataset + ' boundary Model Evaluating *****') print('Loading network ...') estimator = create_model_estimator(arg, devices, eval=True) estimator.eval() if arg.normalized_bbox: regressor = create_model_regressor(arg, devices, eval=True) regressor.eval() transformer = create_model_transformer_a2b(arg, devices, eval=True) transformer.eval() edge = create_model_edge(arg, devices, eval=True) edge.eval() transformer = create_model_transformer_a2b(arg, devices, eval=True) transformer.eval() decoder = create_model_decoder(arg, devices, eval=True) decoder.eval() print('Loading network done!\nStart testing ...') mean = torch.FloatTensor( means_color[arg.eval_dataset_decoder][arg.eval_split_decoder]) std = torch.FloatTensor( stds_color[arg.eval_dataset_decoder][arg.eval_split_decoder]) norm_min = (0 - mean) / std norm_max = (255 - mean) / std norm_range = norm_max - norm_min if arg.cuda: mean = mean.cuda(device=devices[0]) std = std.cuda(device=devices[0]) norm_min = norm_min.cuda(device=devices[0]) # norm_max = norm_max.cuda(device=devices[0]) norm_range = norm_range.cuda(device=devices[0]) if arg.eval_video_path is not None: cap = cv2.VideoCapture(arg.eval_video_path) else: cap = cv2.VideoCapture(0) # detect face and facial landmark while cap.isOpened(): # isOpened() Detect if the camera is on ret, img = cap.read( ) # Save the image information obtained by the camera to the img variable if ret is True: # If the camera reads the image successfully # if arg.eval_visual: show_img(img, 'source', wait=1, keep=True) k = cv2.waitKey(1) if arg.realtime or k == ord('c') or k == ord('C'): face_detector = dlib.cnn_face_detection_model_v1( arg.dlib_face_detector_path) rec = face_detector(img, 1) with torch.no_grad(): for face_i in range(len(rec)): rec_list = rec.pop().rect height = rec_list.bottom() - rec_list.top() width = rec_list.right() - rec_list.left() bbox = [ int(rec_list.left() - arg.scale_ratio * width), int(rec_list.top() - arg.scale_ratio * height), int(rec_list.right() + arg.scale_ratio * width), int(rec_list.bottom() + arg.scale_ratio * height) ] if arg.normalized_bbox: coords, crop_matrix, inv_crop_matrix, heatmaps = detect_coords( arg, img, bbox, arg.crop_size, estimator, regressor, devices) for index in range(kp_num[arg.dataset]): x, y = coords[2 * index], coords[2 * index + 1] (x_t, y_t) = coord_transform((x, y), inv_crop_matrix) coords[2 * index], coords[2 * index + 1] = x_t, y_t inv_crop_matrix = torch.tensor( np.float32(inv_crop_matrix[np.newaxis, :, :])) if arg.cuda: inv_crop_matrix = inv_crop_matrix.cuda( device=devices[0]) heatmaps = F.interpolate(heatmaps, arg.crop_size, mode='bicubic') heatmaps = warp_affine( heatmaps, inv_crop_matrix, (img.shape[0], img.shape[1]), padding_mode='border') norm_bbox = normalized_bbox( coords, arg.dataset, face_size=arg.normalize_face_size, top_shift=arg.normalize_top_shift) position_before = np.float32( [[int(norm_bbox[0]), int(norm_bbox[1])], [int(norm_bbox[0]), int(norm_bbox[3])], [int(norm_bbox[2]), int(norm_bbox[3])]]) position_after = np.float32([[0, 0], [0, 63], [63, 63]]) crop_matrix = cv2.getAffineTransform( position_before, position_after) crop_matrix = torch.tensor( np.float32(crop_matrix[np.newaxis, :, :])) if arg.cuda: crop_matrix = crop_matrix.cuda( device=devices[0]) heatmaps = warp_affine(heatmaps, crop_matrix, (64, 64), padding_mode='border') else: position_before = np.float32( [[int(bbox[0]), int(bbox[1])], [int(bbox[0]), int(bbox[3])], [int(bbox[2]), int(bbox[3])]]) position_after = np.float32( [[0, 0], [0, arg.crop_size - 1], [arg.crop_size - 1, arg.crop_size - 1]]) crop_matrix = cv2.getAffineTransform( position_before, position_after) face_img = cv2.warpAffine( img, crop_matrix, (arg.crop_size, arg.crop_size)) face_gray = convert_img_to_gray(face_img) face_norm = pic_normalize_gray(face_gray) input_face = torch.Tensor(face_norm) input_face = input_face.unsqueeze(0).unsqueeze(0) if arg.cuda: input_face = input_face.cuda(device=devices[0]) heatmaps_orig = estimator(input_face) heatmaps = heatmaps_orig[-1] heatmaps_min = torch.min(heatmaps) heatmaps_range = torch.max(heatmaps) - heatmaps_min heatmaps = transformer( rescale_0_1(heatmaps, heatmaps_min, heatmaps_range)) min = torch.min(heatmaps) max = torch.max(heatmaps) rng = max - min heatmaps = rescale_0_1(heatmaps, min, rng) # heatmaps = F.interpolate(heatmaps, arg.crop_size, mode='bicubic') heatmaps = edge(heatmaps) # heatmaps_trans[heatmaps_trans < arg.boundary_cutoff_lambda * heatmaps_trans.max()] = 0 min = torch.min(heatmaps) max = torch.max(heatmaps) rng = max - min heatmaps = rescale_0_1(heatmaps, min, rng) fake_image_norm = decoder(heatmaps).detach() fake_image_denorm = derescale_0_1( fake_image_norm, norm_min, norm_range) fake_image = denormalize(fake_image_denorm, mean, std).cpu().squeeze().numpy() fake_image = np.uint8( np.clip(np.moveaxis(fake_image, 0, -1), 0.0, 255.0)) fake_image = cv2.cvtColor(fake_image, cv2.COLOR_RGB2BGR) if arg.eval_visual: show_img(fake_image, 'target', wait=1, keep=True) heatmap_show = get_heatmap_gray( heatmaps).detach().cpu().numpy() heatmap_show = (255 - np.uint8( 255 * (heatmap_show - np.min(heatmap_show)) / np.ptp(heatmap_show))) heatmap_show = np.moveaxis(heatmap_show, 0, -1) heatmap_show = cv2.resize(heatmap_show, (256, 256)) show_img(heatmap_show, 'heatmap', wait=1, keep=True) if k == ord('q') or k == ord('Q'): break print('QUIT.') cap.release() # 关闭摄像头 cv2.destroyAllWindows()