def __call__(self, args): """ img (PIL Image): Image to be transformed. Returns: PIL Image: Affine transformed image. """ img, M = args ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img.size) angle, translate, scale, shear = ret if not F._is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format( type(img))) assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ "Argument translate should be a list or tuple of length 2" assert scale >= 0.0, "Argument scale should be positive" output_size = img.size center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) matrix = F._get_inverse_affine_matrix(center, angle, translate, scale, shear) kwargs = {"fillcolor": self.fillcolor} img = img.transform(output_size, Image.AFFINE, matrix, self.resample, **kwargs) # Update transformation matrix inv_M = np.array([*matrix, 0, 0, 1]).reshape(3, 3) M = np.linalg.inv(inv_M) @ M return img, M
def affine_image_tensor( img: torch.Tensor, angle: float, translate: List[float], scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None, center: Optional[List[float]] = None, ) -> torch.Tensor: angle, translate, shear, center = _affine_parse_args( angle, translate, scale, shear, interpolation, center) center_f = [0.0, 0.0] if center is not None: _, height, width = get_dimensions_image_tensor(img) # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. center_f = [ 1.0 * (c - s * 0.5) for c, s in zip(center, [width, height]) ] translate_f = [1.0 * t for t in translate] matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) return _FT.affine(img, matrix, interpolation=interpolation.value, fill=fill)
def __call__(self, file_a, file_b, label, mask): angle = float( torch.empty(1).uniform_(float(self.degrees[0]), float(self.degrees[1])).item()) # print('angle: ', angle) center_f = [0.0, 0.0] matrix = tf._get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) return (F_t.rotate(file_a, matrix=matrix, resample=self.resample, expand=self.expand, fill=self.fill), F_t.rotate(file_b, matrix=matrix, resample=self.resample, expand=self.expand, fill=self.fill), F_t.rotate(label.unsqueeze(0), matrix=matrix, resample=self.resample, expand=self.expand, fill=self.fill).squeeze(0), F_t.rotate(mask.unsqueeze(0), matrix=matrix, resample=self.resample, expand=self.expand, fill=self.fill).squeeze(0))
def affine_image_pil( img: PIL.Image.Image, angle: float, translate: List[float], scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None, center: Optional[List[float]] = None, ) -> PIL.Image.Image: angle, translate, shear, center = _affine_parse_args( angle, translate, scale, shear, interpolation, center) # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5) # it is visually better to estimate the center without 0.5 offset # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine if center is None: _, height, width = get_dimensions_image_pil(img) center = [width * 0.5, height * 0.5] matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) return _FP.affine(img, matrix, interpolation=pil_modes_mapping[interpolation], fill=fill)
def __call__(self, sample): image, boxes, labels = sample height = image.height width = image.width angle, translate, scale, shear = self.get_params(height, width) center = (width * 0.5 + 0.5, height * 0.5 + 0.5) coeffs = F._get_inverse_affine_matrix(center, angle, translate, scale, shear) inverse_affine_matrix = np.eye(3) inverse_affine_matrix[:2] = np.array(coeffs).reshape(2, 3) # Fill-in first 2 rows of an affine transformation matrix if np.random.rand() < self.p_hflip: # Post-apply horizontal flip # Pre-multiply by [ [-1, 0, width], [0, 1, 0], [0, 0, 1] ] matrix flip_matrix = np.eye(3) flip_matrix[0, 0] = -1 flip_matrix[0, 2] = width-1 # For inverse affine matrix, pre-multiply by a inverse flip matrix (which is the same as a flip matrix) inverse_affine_matrix = flip_matrix @ inverse_affine_matrix image = image.transform((width, height), Image.AFFINE, inverse_affine_matrix[:2].reshape(6), Image.BILINEAR) # Compute affine transform matrix and apply it to keypoints affine_matrix = np.linalg.pinv(inverse_affine_matrix) boxes, labels = apply_transform_and_clip(boxes, labels, affine_matrix, (width, height)) return image, boxes, labels
def rotate_image_tensor( img: torch.Tensor, angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, fill: Optional[List[float]] = None, center: Optional[List[float]] = None, ) -> torch.Tensor: center_f = [0.0, 0.0] if center is not None: if expand: warnings.warn( "The provided center argument has no effect on the result if expand is True" ) else: _, height, width = get_dimensions_image_tensor(img) # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. center_f = [ 1.0 * (c - s * 0.5) for c, s in zip(center, [width, height]) ] # due to current incoherence of rotation angle direction between affine and rotate implementations # we need to set -angle. matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) return _FT.rotate(img, matrix, interpolation=interpolation.value, expand=expand, fill=fill)
def __call__(self, sample): """ img (PIL Image): Image to be transformed. Returns: PIL Image: Affine transformed image. """ img, labels = sample['image'], sample['labels'] warp_boxes = sample['warp_boxes'] ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img.size) img = TF.affine(img, *ret, resample=self.resample, fillcolor=self.fillcolor) labels = TF.affine(labels, *ret, resample=self.resample, fillcolor=self.fillcolor) orig_box = warp_boxes * 256. + 256. # Affine boxes center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) matrix = np.array(TF._get_inverse_affine_matrix(center, *ret)).reshape(2, 3) matrix = np.vstack([matrix, np.eye(3)[2]]) assert matrix.shape == (3, 3) affine_trans = trans.AffineTransform(matrix=matrix) new_boxes = affine_trans.inverse(orig_box.reshape(-1, 2)) * (1. / 256.) - 1 new_boxes = torch.from_numpy(new_boxes.reshape(-1, 4).astype(np.float32)) sample.update({'image': img, 'labels': labels, 'warp_boxes': new_boxes }) return sample
def affine_image_tensor( img: torch.Tensor, angle: float, translate: List[float], scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None, center: Optional[List[float]] = None, ) -> torch.Tensor: num_channels, height, width = img.shape[-3:] extra_dims = img.shape[:-3] img = img.view(-1, num_channels, height, width) angle, translate, shear, center = _affine_parse_args( angle, translate, scale, shear, interpolation, center) center_f = [0.0, 0.0] if center is not None: # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. center_f = [ 1.0 * (c - s * 0.5) for c, s in zip(center, [width, height]) ] translate_f = [1.0 * t for t in translate] matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) output = _FT.affine(img, matrix, interpolation=interpolation.value, fill=fill) return output.view(extra_dims + (num_channels, height, width))
def affine_bounding_box( bounding_box: torch.Tensor, format: features.BoundingBoxFormat, image_size: Tuple[int, int], angle: float, translate: List[float], scale: float, shear: List[float], center: Optional[List[float]] = None, ) -> torch.Tensor: original_shape = bounding_box.shape bounding_box = convert_bounding_box_format( bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY).view(-1, 4) dtype = bounding_box.dtype if torch.is_floating_point( bounding_box) else torch.float32 device = bounding_box.device if center is None: height, width = image_size center_f = [width * 0.5, height * 0.5] else: center_f = [float(c) for c in center] translate_f = [float(t) for t in translate] affine_matrix = torch.tensor( _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear, inverted=False), dtype=dtype, device=device, ).view(2, 3) # 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners). # Tensor of points has shape (N * 4, 3), where N is the number of bboxes # Single point structure is similar to # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)] points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2) points = torch.cat( [points, torch.ones(points.shape[0], 1, device=points.device)], dim=-1) # 2) Now let's transform the points using affine matrix transformed_points = torch.matmul(points, affine_matrix.T) # 3) Reshape transformed points to [N boxes, 4 points, x/y coords] # and compute bounding box from 4 transformed points: transformed_points = transformed_points.view(-1, 4, 2) out_bbox_mins, _ = torch.min(transformed_points, dim=1) out_bbox_maxs, _ = torch.max(transformed_points, dim=1) out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1) # out_bboxes should be of shape [N boxes, 4] return convert_bounding_box_format( out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False).view(original_shape)
def __call__(self, file): file = torch.from_numpy(file) angle = float(torch.empty(1).uniform_(float(self.degrees[0]), float(self.degrees[1])).item()) # print('angle: ', angle) center_f = [0.0, 0.0] matrix = tf._get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) return F_t.rotate(file, matrix=matrix, resample=self.resample, expand=self.expand, fill=self.fill)
def __init__(self, imageSize, shear=(0,0), angle=0, translate=(0,0), scale=0.9): center = (imageSize[0] * 0.5 + 0.5, imageSize[1] * 0.5 + 0.5) # shear = (np.random.uniform(-8,8),np.random.uniform(-8,8)) # angle = np.random.uniform(-30,30) self.invAffMat = _get_inverse_affine_matrix(center=center, angle=angle, translate=translate, scale=scale, shear=shear) invAffM = np.mat([ [self.invAffMat[0],self.invAffMat[1],self.invAffMat[2] ], [self.invAffMat[3],self.invAffMat[4],self.invAffMat[5] ], [0 ,0 ,1 ] ]) affMat = np.linalg.inv(invAffM) self.affMat = affMat.item(0),affMat.item(1),affMat.item(2),affMat.item(3),affMat.item(4),affMat.item(5)
def __call__(self, item): img = item[0] tx = _get_inverse_affine_matrix((img.shape[0] // 2, img.shape[1] // 2), self.angle, (0, 0), self.scale, self.shear) M = np.array(tx) M = np.reshape(M, (2, 3)) return [ cv2.warpAffine(x, M, dsize=(img.shape[0], img.shape[1])) for x in item ]
def __call__(self, img): """ Args img (PIL Image): Image to be transformed. Returns: PIL Image: Affine transformed image. """ params = self.get_params(self.degrees, self.translate, self.scale, self.shear, img.size) center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) self._matrix = _get_inverse_affine_matrix(center, *params) return F.affine(img, *params, resample=self.resample, fillcolor=self.fillcolor)
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ if self.train: img1, target = self.train_data[index], self.train_labels[index] else: img1, target = self.test_data[index], self.test_labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img1 = Image.fromarray(img1) if self.transform_pre is not None: img1 = self.transform_pre(img1) # affine transformation on image2 ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img1.size) output_size = img1.size center = (img1.size[0] * 0.5 + 0.5, img1.size[1] * 0.5 + 0.5) matrix = _get_inverse_affine_matrix(center, *ret) kwargs = {"fillcolor": self.fillcolor} if PILLOW_VERSION[0] == '5' else {} img2 = img1.transform(output_size, Image.AFFINE, matrix, self.resample, **kwargs) if self.transform is not None: img1 = self.transform(img1) img2 = self.transform(img2) if self.target_transform is not None: target = self.target_transform(target) aff_para = [math.cos(math.radians(ret[0])), math.sin(math.radians(ret[0])), ret[1][0]/self.translate[0]/output_size[0], ret[1][1]/self.translate[1]/output_size[1], ret[2]*2./(self.scale[1]-self.scale[0])-(self.scale[0]+self.scale[1])/(self.scale[1]-self.scale[0]), ret[3]*2./(self.shear[1]-self.shear[0])-(self.shear[0]+self.shear[1])/(self.shear[1]-self.shear[0])] aff_para = torch.from_numpy(np.array(aff_para, np.float32, copy=False)) return img1, img2, aff_para, target
def _compute_extended_patch_size( w: float, h: float, rotation: float, scale: float, shear: List[float]) -> Tuple[float, float]: transform = np.concatenate([ np.array( _get_inverse_affine_matrix( center=(0.5, 0.5), angle=rotation, translate=(0, 0), scale=scale, shear=shear, )).reshape(2, -1), np.array([[0.0, 0.0, 1.0]]), ]) corners = np.array([[0, 0, 1], [0, h, 1], [w, 0, 1], [w, h, 1]]) inv_corners = transform @ np.transpose(corners) xmax, ymax = inv_corners[:2].max(1) xmin, ymin = inv_corners[:2].min(1) return xmax - xmin, ymax - ymin
def _test_transformation(a, t, s, sh): a_rad = math.radians(a) s_rad = math.radians(sh) # 1) Check transformation matrix: c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]]) c_inv_matrix = np.linalg.inv(c_matrix) t_matrix = np.array([[1.0, 0.0, t[0]], [0.0, 1.0, t[1]], [0.0, 0.0, 1.0]]) r_matrix = np.array( [[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0], [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0], [0.0, 0.0, 1.0]]) true_matrix = np.dot( t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix))) result_matrix = _to_3x3_inv( F._get_inverse_affine_matrix(center=cnt, angle=a, translate=t, scale=s, shear=sh)) assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10 # 2) Perform inverse mapping: true_result = np.zeros((200, 200, 3), dtype=np.uint8) inv_true_matrix = np.linalg.inv(true_matrix) for y in range(true_result.shape[0]): for x in range(true_result.shape[1]): res = np.dot(inv_true_matrix, [x, y, 1]) _x = int(res[0] + 0.5) _y = int(res[1] + 0.5) if 0 <= _x < input_img.shape[ 1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) assert result.size == pil_img.size # Compute number of different pixels: np_result = np.array(result) n_diff_pixels = np.sum(np_result != true_result) / 3 # Accept 3 wrong pixels assert n_diff_pixels < 3, \ "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\ "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))
def _test_transformation(a, t, s, sh): a_rad = math.radians(a) s_rad = math.radians(sh) # 1) Check transformation matrix: c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]]) c_inv_matrix = np.linalg.inv(c_matrix) t_matrix = np.array([[1.0, 0.0, t[0]], [0.0, 1.0, t[1]], [0.0, 0.0, 1.0]]) r_matrix = np.array([[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0], [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0], [0.0, 0.0, 1.0]]) true_matrix = np.dot(t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix))) result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a, translate=t, scale=s, shear=sh)) assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10 # 2) Perform inverse mapping: true_result = np.zeros((200, 200, 3), dtype=np.uint8) inv_true_matrix = np.linalg.inv(true_matrix) for y in range(true_result.shape[0]): for x in range(true_result.shape[1]): res = np.dot(inv_true_matrix, [x, y, 1]) _x = int(res[0] + 0.5) _y = int(res[1] + 0.5) if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) assert result.size == pil_img.size # Compute number of different pixels: np_result = np.array(result) n_diff_pixels = np.sum(np_result != true_result) / 3 # Accept 3 wrong pixels assert n_diff_pixels < 3, \ "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\ "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))
def _affine_bounding_box_xyxy( bounding_box: torch.Tensor, image_size: Tuple[int, int], angle: float, translate: Optional[List[float]] = None, scale: Optional[float] = None, shear: Optional[List[float]] = None, center: Optional[List[float]] = None, expand: bool = False, ) -> torch.Tensor: dtype = bounding_box.dtype if torch.is_floating_point( bounding_box) else torch.float32 device = bounding_box.device if translate is None: translate = [0.0, 0.0] if scale is None: scale = 1.0 if shear is None: shear = [0.0, 0.0] if center is None: height, width = image_size center_f = [width * 0.5, height * 0.5] else: center_f = [float(c) for c in center] translate_f = [float(t) for t in translate] affine_matrix = torch.tensor( _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear, inverted=False), dtype=dtype, device=device, ).view(2, 3) # 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners). # Tensor of points has shape (N * 4, 3), where N is the number of bboxes # Single point structure is similar to # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)] points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2) points = torch.cat( [points, torch.ones(points.shape[0], 1, device=points.device)], dim=-1) # 2) Now let's transform the points using affine matrix transformed_points = torch.matmul(points, affine_matrix.T) # 3) Reshape transformed points to [N boxes, 4 points, x/y coords] # and compute bounding box from 4 transformed points: transformed_points = transformed_points.view(-1, 4, 2) out_bbox_mins, _ = torch.min(transformed_points, dim=1) out_bbox_maxs, _ = torch.max(transformed_points, dim=1) out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1) if expand: # Compute minimum point for transformed image frame: # Points are Top-Left, Top-Right, Bottom-Left, Bottom-Right points. height, width = image_size points = torch.tensor( [ [0.0, 0.0, 1.0], [0.0, 1.0 * height, 1.0], [1.0 * width, 1.0 * height, 1.0], [1.0 * width, 0.0, 1.0], ], dtype=dtype, device=device, ) new_points = torch.matmul(points, affine_matrix.T) tr, _ = torch.min(new_points, dim=0, keepdim=True) # Translate bounding boxes out_bboxes[:, 0::2] = out_bboxes[:, 0::2] - tr[:, 0] out_bboxes[:, 1::2] = out_bboxes[:, 1::2] - tr[:, 1] return out_bboxes
def __getitem__(self, idx): # pylint: disable=too-many-locals # Sample a random transformation rotation = np.random.uniform(-self._max_rotation_jitter, self._max_rotation_jitter) scale = np.exp( np.random.uniform(-self._max_scale_jitter, self._max_scale_jitter)) shear = np.random.uniform(-self._max_shear_jitter, self._max_shear_jitter, size=2) # Compute the "extended" patch size. This is the size of the patch that # we will first transform and then center crop to the final size. extpatch_w, extpatch_h = self._compute_extended_patch_size( w=self._patch_w, h=self._patch_h, rotation=rotation, scale=scale, shear=shear, ) # The slide may not be large enough for the extended patch size. In # this case, we will downscale the target patch size until the extended # patch size fits. adjmul = min(1.0, self._slide.W / extpatch_w, self._slide.H / extpatch_h) extpatch_w = min(int(np.ceil(extpatch_w * adjmul)), self._slide.W) extpatch_h = min(int(np.ceil(extpatch_h * adjmul)), self._slide.H) patch_w = int(self._patch_w * adjmul) patch_h = int(self._patch_h * adjmul) # Extract the extended patch by sampling uniformly from the size of the # slide x, y = [ np.random.randint(a - b + 1) for a, b in zip((self._slide.W, self._slide.H), (extpatch_w, extpatch_h)) ] image = self._slide.image[y:y + extpatch_h, x:x + extpatch_w] image = (255 * (image + 1) / 2).astype(np.uint8) image = to_pil_image(image) label = to_pil_image(self._slide.label[y:y + extpatch_h, x:x + extpatch_w]) # Apply augmentations output_size = (max(extpatch_w, patch_w), max(extpatch_h, patch_h)) transformation = _get_inverse_affine_matrix( center=(image.size[0] * 0.5, image.size[1] * 0.5), angle=rotation, translate=[(a - b) / 2 for a, b in zip(output_size, image.size)], scale=scale, shear=shear, ) image = self.image_augmentation(image) image = np.array( image.transform( output_size, Image.AFFINE, transformation, resample=Image.BILINEAR, )) image = center_crop(image, (patch_h, patch_w)) label = np.array( label.transform( output_size, Image.AFFINE, transformation, resample=Image.NEAREST, )) label = center_crop(label, (patch_h, patch_w)) if np.random.rand() < 0.5: image = np.flip(image, 0).copy() label = np.flip(label, 0).copy() # Convert image to the correct data format (float32 in [-1, 1] and in # CHW order) image = 2 * image.astype(np.float32) / 255 - 1 image = image.transpose(2, 0, 1) return self._slide.prepare_data(image, label)
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ train_path = self.trainlist[index] aim_path = self.aimlist[index] img1 = np.load(train_path) target = np.load(aim_path) #if self.unlabel_Data: # doing this so that it is consistent with all other datasets # to return a PIL Image #print (img1.shape) #img1_transform = img1.copy() img1 = img1.transpose((1, 2, 0)) #change 3*32*32 to 32*32*3 #print(img1.shape) #print(img1) img1 = self.normalise(img1, self.dataset_mean, self.dataset_std) #normalize#channel last format #print(img1.shape) #print(img1) #exit() img1 = img1.transpose((2, 0, 1)) #change back to 3*32*32 #print(img1.shape) #exit() if self.transform_pre is not None: #operation should be channel first #if self.unlabel_Data:#remove unlabel data label to save a dataloader to free computer cpu usage self.transform_now = TransformTwice(self.transform_pre) img1, img1_another = self.transform_now(img1) # if self.train_label == False and self.valid_size != 0: # for the labeled dataset # if self.transform is not None: # img1 = self.transform(img1) # if self.target_transform is not None: # target = self.target_transform(target) # img1 = torch.from_numpy(img1) # return img1,target #else: #img1 = self.transform_pre(img1) #print(img1) #exit() #if self.transform_pre is not None: # img1_transform=self.transform_pre(img1_transform)#get img1 from the tranform result #revoke bake img1 to make #print(img1) #print(img1.shape) img1_transform = img1.transpose((1, 2, 0)) #print(img1_transform.shape) img1_transform = self.denormalize(img1_transform, self.dataset_mean, self.dataset_std) #print(img1_transform) #print(img1_transform.shape) #img1_transform = img1.transpose((2,0,1)) #exit() #print(img1.shape()) #exit() img1_transform = Image.fromarray(img1_transform.astype(np.uint8)) # projective transformation on image2 width, height = img1_transform.size center = (img1_transform.size[0] * 0.5 + 0.5, img1_transform.size[1] * 0.5 + 0.5) shift = [ float(random.randint(-int(self.shift), int(self.shift))) for ii in range(8) ] scale = random.uniform(self.scale[0], self.scale[1]) rotation = random.randint(0, 3) pts = [((0 - center[0]) * scale + center[0], (0 - center[1]) * scale + center[1]), ((width - center[0]) * scale + center[0], (0 - center[1]) * scale + center[1]), ((width - center[0]) * scale + center[0], (height - center[1]) * scale + center[1]), ((0 - center[0]) * scale + center[0], (height - center[1]) * scale + center[1])] pts = [pts[(ii + rotation) % 4] for ii in range(4)] pts = [(pts[ii][0] + shift[2 * ii], pts[ii][1] + shift[2 * ii + 1]) for ii in range(4)] coeffs = self.find_coeffs(pts, [(0, 0), (width, 0), (width, height), (0, height)]) kwargs = { "fillcolor": self.fillcolor } if PILLOW_VERSION[0] == '5' else {} img2 = img1_transform.transform((width, height), Image.PERSPECTIVE, coeffs, self.resample, **kwargs) #img1_transform = np.array(img1_transform).astype('float32') img2 = np.array(img2).astype('float32') img2 = self.normalise(img2, self.dataset_mean, self.dataset_std) img2 = img2.transpose((2, 0, 1)) #apply affine transformation here ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img1_transform.size) output_size = img1_transform.size # 32*32 center = (img1_transform.size[0] * 0.5 + 0.5, img1_transform.size[1] * 0.5 + 0.5) matrix = _get_inverse_affine_matrix(center, *ret) kwargs = { "fillcolor": self.fillcolor } if PILLOW_VERSION[0] == '5' else {} img3 = img1_transform.transform(output_size, Image.AFFINE, matrix, self.resample, **kwargs) img3 = np.array(img3).astype('float32') img3 = self.normalise(img3, self.dataset_mean, self.dataset_std) img3 = img3.transpose((2, 0, 1)) aff_para = [ math.cos(math.radians(ret[0])), # degree cos math.sin(math.radians(ret[0])), # degree sin ret[1][0] / self.translate[0] / output_size[0], # translate x ret[1][1] / self.translate[1] / output_size[1], # translate y ret[2] * 2. / (self.scale[1] - self.scale[0]) - (self.scale[0] + self.scale[1]) / (self.scale[1] - self.scale[0]), # scale ret[3] * 2. / (self.shear[1] - self.shear[0]) - (self.shear[0] + self.shear[1]) / (self.shear[1] - self.shear[0]) ] # shear aff_para = torch.from_numpy(np.array( aff_para, np.float32, copy=False)) # affine transform parameter #apply similarity transformation matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], ret[2], 0) kwargs = { "fillcolor": self.fillcolor } if PILLOW_VERSION[0] == '5' else {} img4 = img1_transform.transform(output_size, Image.AFFINE, matrix, self.resample, **kwargs) img4 = np.array(img4).astype('float32') img4 = self.normalise(img4, self.dataset_mean, self.dataset_std) img4 = img4.transpose((2, 0, 1)) #apply eculidean transfomration matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], 1.0, 0) kwargs = { "fillcolor": self.fillcolor } if PILLOW_VERSION[0] == '5' else {} img5 = img1_transform.transform(output_size, Image.AFFINE, matrix, self.resample, **kwargs) img5 = np.array(img5).astype('float32') img5 = self.normalise(img5, self.dataset_mean, self.dataset_std) img5 = img5.transpose((2, 0, 1)) #apply the colorize, contrast, brightness, sharpeness to the image img6, oper_params = self.operate_CCBS(img1_transform) img6 = np.array(img6).astype('float32') img6 = self.normalise(img6, self.dataset_mean, self.dataset_std) img6 = img6.transpose((2, 0, 1)) #add another image with cutout img7 = np.array(img1_transform).astype('float32') img7 = self.normalise(img7, self.dataset_mean, self.dataset_std) img7 = img7.transpose((2, 0, 1)) img7 = self.cut_out(img7) img1_transform = np.array(img1_transform).astype('float32') if self.transform is not None: img1 = self.transform(img1) img1_transform = self.transform(img1_transform) img2 = self.transform(img2) img3 = self.transform(img3) img4 = self.transform(img4) img5 = self.transform(img5) img6 = self.transform(img6) img7 = self.transform(img7) if self.target_transform is not None: target = self.target_transform(target) img1 = torch.from_numpy(img1) img2 = torch.from_numpy(img2) img3 = torch.from_numpy(img3) img4 = torch.from_numpy(img4) img5 = torch.from_numpy(img5) img6 = torch.from_numpy(img6) img7 = torch.from_numpy(img7) img1_transform = torch.from_numpy(img1_transform) coeffs = torch.from_numpy(np.array(coeffs, np.float32, copy=False)).view(8, 1, 1) oper_params = torch.from_numpy(oper_params) if self.matrix_transform is not None: coeffs = self.matrix_transform(coeffs) #if self.unlabel_Data: #img1_another = np.array(img1_another).astype('float32') #img1_another = self.normalise(img1_another) #img1_another = img1_another.transpose((2, 0, 1)) if self.transform is not None: img1_another = self.transform(img1_another) if self.transform_pre is not None: img1_another = torch.from_numpy(img1_another) #print(img1) #print(img1_another) #exit() return ( img1, img1_another ), img2, img3, img4, img5, img6, img7, aff_para, coeffs, oper_params, target else: return ( img1, img1 ), img2, img3, img4, img5, img6, img7, aff_para, coeffs, oper_params, target
def __getitem__(self, i): assert (type(i) is int) p = self.projs[i, :, :, :] s = None if self.segs is not None: s = self.segs[i, :, :, :] cur_lands = None if self.lands is not None: # we need a deep copy here because of possible data aug cur_lands = self.lands[i, :, :].clone() need_to_pad_proj = self.extra_pad > 0 if (self.prob_of_aug > 0) and (random.random() < self.prob_of_aug): #print('augmenting...') if self.do_invert and (random.random() < 0.5): #print(' inversion...') p_max = p.max() #p_min = p.min() p = p_max - p if self.print_aug_info: print('inverting') if self.do_noise: # normalize to [0,1] to apply noise p_min = p.min() p_max = p.max() p = (p - p_min) / (p_max - p_min) cur_noise_sigma = random.uniform(0.005, 0.01) p += torch.randn(p.shape) * cur_noise_sigma p = (p * (p_max - p_min)) + p_min if self.print_aug_info: print('noise sigma: {:.3f}'.format(cur_noise_sigma)) if self.do_gamma: # normalize to [0,1] to apply gamma p_min = p.min() p_max = p.max() p = (p - p_min) / (p_max - p_min) gamma = random.uniform(0.7, 1.3) p.pow_(gamma) p = (p * (p_max - p_min)) + p_min if self.print_aug_info: print('gamma = {:.2f}'.format(gamma)) if self.do_affine: # data needs to be in [0,1] for PIL functions p_min = p.min() p_max = p.max() p = (p - p_min) / (p_max - p_min) orig_p_shape = p.shape if self.pad_data_for_affine: pad1 = int(math.ceil(orig_p_shape[1] / 2.0)) pad2 = int(math.ceil(orig_p_shape[2] / 2.0)) if need_to_pad_proj: pad1 += self.extra_pad pad2 += self.extra_pad need_to_pad_proj = False p = torch.from_numpy( np.pad(p.numpy(), ((0, 0), (pad1, pad1), (pad2, pad2)), 'reflect')) p_il = TF.to_pil_image(p) # this uniformly samples the direction rand_trans = torch.randn(2) rand_trans /= rand_trans.norm() # now uniformly sample the magnitdue rand_trans *= random.random() * 20 rot_ang = random.uniform(-5, 5) trans_x = rand_trans[0] trans_y = rand_trans[1] shear = random.uniform(-2, 2) scale_factor = random.uniform(0.9, 1.1) if self.print_aug_info: print('Rot: {:.2f}'.format(rot_ang)) print('Trans X: {:.2f} , Trans Y: {:.2f}'.format( trans_x, trans_y)) print('Shear: {:.2f}'.format(shear)) print('Scale: {:.2f}'.format(scale_factor)) p = TF.to_tensor( TF.affine(TF.to_pil_image(p), rot_ang, (trans_x, trans_y), scale_factor, shear, resample=PIL.Image.BILINEAR)) if self.pad_data_for_affine: # pad can be zero pad_shape = (orig_p_shape[-2] + (2 * self.extra_pad), orig_p_shape[-1] + (2 * self.extra_pad)) p = center_crop(p, pad_shape) p = (p * (p_max - p_min)) + p_min if s is not None: orig_s_shape = s.shape if self.pad_data_for_affine: pad1 = int(math.ceil(orig_s_shape[1] / 2.0)) pad2 = int(math.ceil(orig_s_shape[2] / 2.0)) s = torch.from_numpy( np.pad(s.numpy(), ((0, 0), (pad1, pad1), (pad2, pad2)), 'reflect')) # warp each class separately, I don't want any wacky color # spaces assumed by PIL for c in range(s.shape[0]): s[c, :, :] = TF.to_tensor( TF.affine(TF.to_pil_image(s[c, :, :]), rot_ang, (trans_x, trans_y), scale_factor, shear)) if self.pad_data_for_affine: s = center_crop(s, orig_s_shape) if cur_lands is not None: shape_for_center_of_rot = s.shape if s is not None else p.shape center_of_rot = ((shape_for_center_of_rot[-2] / 2.0) + 0.5, (shape_for_center_of_rot[-1] / 2.0) + 0.5) A_inv = TF._get_inverse_affine_matrix( center_of_rot, rot_ang, (trans_x, trans_y), scale_factor, shear) A = np.matrix([[A_inv[0], A_inv[1], A_inv[2]], [A_inv[3], A_inv[4], A_inv[5]], [0, 0, 1]]).I for pt_idx in range(cur_lands.shape[-1]): cur_land = cur_lands[:, pt_idx] if (not math.isinf(cur_land[0])) and (not math.isinf( cur_land[1])): tmp_pt = A * np.asmatrix( np.pad(cur_land.numpy(), (0, 1), mode='constant', constant_values=1).reshape(3, 1)) xform_l = torch.from_numpy( np.squeeze(np.asarray(tmp_pt))[0:2]) if (s is not None) and \ ((xform_l[0] < 0) or (xform_l[0] > (orig_s_shape[1] - 1)) or \ (xform_l[1] < 0) or (xform_l[1] < (orig_s_shape[0] - 1))): xform_l[0] = math.inf xform_l[1] = math.inf cur_lands[:, pt_idx] = xform_l if self.do_erase and (random.random() < self.erase_prob): #print(' box noise/erase...') p_2d_shape = [p.shape[-2], p.shape[-1]] box_mean_dim = torch.Tensor( [p_2d_shape[0] * 0.15, p_2d_shape[1] * 0.15]) num_boxes = random.randint(1, 5) if self.print_aug_info: print(' Random Corrupt: num. boxes: {}'.format(num_boxes)) for box_idx in range(num_boxes): box_valid = False while not box_valid: # First sample box dims box_dims = torch.round((torch.randn(2) * (box_mean_dim)) + box_mean_dim).long() if (box_dims[0] > 0) and (box_dims[1] > 0) and \ (box_dims[0] <= p_2d_shape[0]) and (box_dims[1] <= p_2d_shape[1]): # Next sample box location start_row = random.randint( 0, p_2d_shape[0] - box_dims[0]) start_col = random.randint( 0, p_2d_shape[1] - box_dims[1]) box_valid = True p_roi = p[0, start_row:(start_row + box_dims[0]), start_col:(start_col + box_dims[1])] sigma_noise = (p_roi.max() - p_roi.min()) * 0.2 p_roi += torch.randn(p_roi.shape) * sigma_noise # end data aug if need_to_pad_proj: p = torch.from_numpy( np.pad(p.numpy(), ((0, 0), (self.extra_pad, self.extra_pad), (self.extra_pad, self.extra_pad)), 'reflect')) if self.do_norm_01_scale: p = (p - p.mean()) / p.std() h = None if self.include_heat_map: assert (s is not None) assert (cur_lands is not None) num_lands = cur_lands.shape[-1] h = torch.zeros(num_lands, 1, s.shape[-2], s.shape[-1]) # "FH-l", "FH-r", "GSN-l", "GSN-r", "IOF-l", "IOF-r", "MOF-l", "MOF-r", "SPS-l", "SPS-r", "IPS-l", "IPS-r" #sigma_lut = [ 2.5, 2.5, 7.5, 7.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5] sigma_lut = torch.full([num_lands], 2.5) (Y, X) = torch.meshgrid(torch.arange(0, s.shape[-2]), torch.arange(0, s.shape[-1])) Y = Y.float() X = X.float() for land_idx in range(num_lands): sigma = sigma_lut[land_idx] cur_land = cur_lands[:, land_idx] mu_x = cur_land[0] mu_y = cur_land[1] if not math.isinf(mu_x) and not math.isinf(mu_y): pdf = torch.exp( ((X - mu_x).pow(2) + (Y - mu_y).pow(2)) / (sigma * sigma * -2)) / (2 * math.pi * sigma * sigma) #pdf /= pdf.sum() # normalize to sum of 1 h[land_idx, 0, :, :] = pdf #assert(torch.all(torch.isfinite(h))) return (p, s, cur_lands, h)
def __getitem__(self, index): clip_name, label = self.objects[index] clip_path = self.clip_name2path_dict[clip_name][0] cap = CV2VideoCapture(clip_path) trg_people_channel_num = 1 if self.players_in_same_channel else 2 frames = np.zeros((self.filtered_seq_len, trg_people_channel_num, 128, 256, 3), dtype=np.uint8) flow = np.zeros((self.filtered_seq_len, trg_people_channel_num, 128, 256, 2), dtype=np.float32) angle, translate, scale, shear = 0.0, 0.0, 1.0, 0.0 if self.mode == 'train': flip = random.choice([0, 1]) angle, translate, scale, shear = self.get_augmentation_params(angle_max=15, translate_max=((-10, 10), (-20, 10)), scale_range=(0.75, 1.15), shear_max=10) center = (128 * 0.5 + 0.5, 256 * 0.5 + 0.5) affine_matrix = np.eye(3) affine_matrix[:, :2] = np.array(_get_inverse_affine_matrix(center, angle, translate, scale, shear=shear)).reshape(3,2) #print(affine_matrix) seqs_to_count = [i for i in range(self.seq_len) if (i >= 0 and i <= 52 and i % self.filtered_seq_step_size == 0)] seqs_to_count = seqs_to_count[-self.filtered_seq_len:] # filter sequence seqs_to_count.sort() for seq_ind in range(self.seq_len): for p in [0, 1]: curr_frame_img = cap.read() if seq_ind in seqs_to_count: if self.mode == 'train': img = Image.fromarray(curr_frame_img) # augmentations img = torchvision.transforms.functional.affine(img, angle=angle, translate=translate, scale=scale, shear=shear, resample=0, fillcolor=0) curr_frame_img = np.array(img) frames[seqs_to_count.index(seq_ind), p % trg_people_channel_num] += curr_frame_img if self.mode == 'train' and flip: label = flip_label(label) frames = frames[:, :, :, ::-1, :] if self.use_optical_flow: for i in range(self.filtered_seq_len - 1): for p in range(trg_people_channel_num): flow[i, p] = self.calculate_optical_flow(frames[i, p], frames[i + 1, p]) if self.use_pose_optical_flow: pose = self.poses_dict[clip_name] def get_y_lim(curr_poses): all_y = curr_poses[:, :, :, :, 1] all_y_non_zero = all_y[all_y > 0] if len(all_y_non_zero) > 0: y_max = np.max(all_y_non_zero) y_min = np.min(all_y_non_zero) else: y_max = 0 y_min = 0 assert not np.isinf(y_max) assert not np.isinf(y_min) return y_min, y_max y_min, y_max = get_y_lim(pose) #print('pose max/min:',y_max, y_min) pose[:, :, :, :, 0] = np.minimum(pose[:, :, :, :, 0] / 5.0, 1279//5) pose[:, :, :, :, 1] = np.minimum(pose[:, :, :, :, 1] / 2.0, 719//2) for i in range(self.filtered_seq_len - 1): for p in [0, 1]: curr_flow = self.calculate_pose_optical_flow(pose[i, p], pose[i + 1, p]) #.transpose([1,0,2]) curr_flow = curr_flow[max(0, int(y_min / 2) - padding):min(720 // 2,int(y_max / 2) + padding), :, :] #print('flow shape', curr_flow.shape) curr_flow = zoom(curr_flow, np.divide((128, 256, 2), curr_flow.shape), order=0) if self.mode == 'train': #print(curr_flow.shape) flow_img = torchvision.transforms.functional.affine(Image.fromarray(draw_hsv(curr_flow)), angle=angle, translate=translate, scale=scale, shear=shear, resample=0, fillcolor=0) #curr_flow = affine_transform(curr_flow, affine_matrix, order=0) flow_arr = np.array(flow_img) curr_flow = np.stack([flow_arr[:,:,0], flow_arr[:,:,2]], axis=2) flow[i, p % trg_people_channel_num] += curr_flow#.transpose([1,0,2]) frames = frames.astype(np.float32) frames = frames / 255.0 # [0, 1] => [-1, 1] frames = (frames * 2) - 1 return torch.from_numpy(frames.copy()).unsqueeze(2).transpose(2, -1).squeeze(-1), \ torch.from_numpy(flow).unsqueeze(2).transpose(2, -1).squeeze(-1), \ label, clip_name
kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {} resample = PIL.Image.BILINEAR img2 = img1_transform.transform((width, height), Image.PERSPECTIVE, coeffs, resample, **kwargs) tmp_path = os.path.join(root_path, 'projective.png') img2.save(tmp_path) listIm.append(tmp_path) degrees = (-180, 180) translate = (-0.2, 0.2) scale = (0.8, 1.2) shear = (-30, 30) ret = get_params(degrees, translate, scale, shear, img1_transform.size) output_size = img1_transform.size center = (img1_transform.size[0] * 0.5 + 0.5, img1_transform.size[1] * 0.5 + 0.5) matrix = _get_inverse_affine_matrix(center, *ret) kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {} img3 = img1_transform.transform(output_size, Image.AFFINE, matrix, resample, **kwargs) tmp_path = os.path.join(root_path, 'affine.png') img3.save(tmp_path) listIm.append(tmp_path) matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], ret[2], 0) kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {} img4 = img1_transform.transform(output_size, Image.AFFINE, matrix, resample, **kwargs) tmp_path = os.path.join(root_path, 'similarity.png') img4.save(tmp_path) listIm.append(tmp_path) matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], 1.0, 0)
def main(_): writer = SummaryWriter(log_dir=opts.tb_log_dir + str(opts.alpha) + '/' + opts.exp_name) torch.manual_seed(0) if opts.category in ['horse', 'tiger']: dataset = tf_final.TigDogDataset_Final(opts.root_dir, opts.category, transforms=None, normalize=False, max_length=None, remove_neck_kp=False, split='train', img_size=opts.img_size, mirror=False, scale=False, crop=False) collate_fn = tf_final.TigDog_collate directory = opts.tmp_dir + '/' + opts.category + '/' if not osp.exists(directory): os.makedirs(directory) save_counter = 0 sample_to_vid = {} samples_per_vid = {} print('Number of videos for ', opts.category, '-', len(dataset)) i_sample = 0 for i_sample, sample in enumerate(dataset): num_frames = sample['video'].shape[0] for i in range(num_frames): new_sample = {} for k in sample.keys(): if k in [ 'video', 'sfm_poses', 'landmarks', 'segmentations', 'bboxes' ]: new_sample[k] = sample[k][i] pkl.dump(new_sample, open(directory + str(save_counter) + '.pkl', 'wb')) sample_to_vid[save_counter] = i_sample if i_sample in samples_per_vid: samples_per_vid[i_sample].append(save_counter) else: samples_per_vid[i_sample] = [save_counter] save_counter += 1 # if i >= 5: # 35: # TODO:fix this # break #if i_sample >= 3: # TODO:fix this # break training_samples = save_counter print('Training samples (frames):', training_samples) dataset = tigdog_mf.TigDogDataset_MultiFrame( opts.tmp_dir, opts.category, num_frames=opts.num_frames, sample_to_vid=sample_to_vid, samples_per_vid=samples_per_vid, normalize=True, transforms=True, remove_neck_kp=True, split='train', img_size=opts.img_size, mirror=True, scale=True, crop=True, v2_crop=True, tight_bboxes=True) collate_fn = tigdog_mf.TigDog_collate dataloader = DataLoader(dataset, opts.batch_size, drop_last=True, shuffle=True, collate_fn=collate_fn, num_workers=2) print('Dataloader:', len(dataloader)) IMM_Model = IMM(dim=opts.num_kps, heatmap_std=opts.std, in_channel=3, h_channel=32).cuda() loss_fn_vgg = lpips.LPIPS(net='vgg').cuda() loss_mse = torch.nn.MSELoss() optimizer = optim.Adam(IMM_Model.parameters(), lr=opts.lr) lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=0, threshold=1e-5) n_iter = 0 n_batch = 0 for epoch in range(opts.epochs): avg_loss = 0 for sample in dataloader: input_img_tensor = sample['img'].type( torch.FloatTensor).clone().cuda() mask_3channels = torch.unsqueeze(sample['mask'], 2) mask_3channels = mask_3channels.repeat(1, 1, 3, 1, 1).clone().cuda() input_img_tensor *= mask_3channels frame1 = input_img_tensor[:, 0] frame2 = input_img_tensor[:, 1] source = frame1 target = frame2 target_mask = mask_3channels[:, 1].cpu() #mask_edt = np.stack([compute_dt(m) for m in target_mask]) total_loss_affine = 0 #reconstruct image, get result_kps reconstruct, result_kps, gauss, pose_val = IMM_Model( source, target) reconstruct_display = torch.clamp(reconstruct, 0, 1) result_kps_vis = torch.cat( [result_kps, torch.ones_like(result_kps[:, :, :1])], dim=-1) #edts_barrier = torch.tensor(mask_edt).float().unsqueeze(1).cuda() #loss_mask = texture_dt_loss_v(result_kps, edts_barrier) loss_reconstruction = loss_fn_vgg.forward(reconstruct, target).mean() for i in range(4): #transform target to target_affine rand_angle = np.random.uniform(0, 50) rand_shear = np.random.uniform(0, 50) target_affine = affine(target, rand_angle, [0.0, 0.0], 1.0, [0.0, rand_shear]) #transform image matrix = _get_inverse_affine_matrix( [0.0, 0.0], rand_angle, [0.0, 0.0], 1.0, [0.0, rand_shear] ) #keep track of matrix used for affine, need to transform kps transformation_matrix = torch.tensor( [[matrix[0], matrix[1], matrix[2]], [matrix[3], matrix[4], matrix[5]], [0, 0, 1]]).cuda() #get predicted keypoints of affine image _, affine_kps, _, _ = IMM_Model(source, target_affine) #set the true affine keypoints = matrix @ predicted kps in original img true_affine_kps = torch.zeros(opts.batch_size, opts.num_kps, 2).cuda() for batch in range(opts.batch_size): for n in range(opts.num_kps): result_xyz = torch.tensor([ result_kps[batch, n, 0], result_kps[batch, n, 1], 1 ]).cuda() t = torch.matmul(torch.inverse(transformation_matrix), result_xyz) true_affine_kps[batch, n] = t[:2] true_affine_kps_vis = torch.stack([ true_affine_kps[:, :, 0], true_affine_kps[:, :, 1], torch.ones_like(true_affine_kps[:, :, 1]) ], dim=-1) pred_affine_kps_vis = torch.stack([ affine_kps[:, :, 0], affine_kps[:, :, 1], torch.ones_like(affine_kps[:, :, 1]) ], dim=-1) loss_affine = loss_mse(affine_kps, true_affine_kps) total_loss_affine += loss_affine if n_batch % opts.vis_every == 0: kp_img = utils.kp2im( result_kps_vis[0].detach().cpu().numpy(), target[0].cpu().numpy(), radius=2) / 255 kp_img = torch.from_numpy(kp_img).permute(2, 0, 1)[None] kp_img = kp_img.to(source.device) kp_affine = utils.kp2im( true_affine_kps_vis[0].detach().cpu().numpy(), target_affine[0].cpu().numpy(), radius=2) / 255 kp_affine = torch.from_numpy(kp_affine).permute(2, 0, 1)[None] kp_affine = kp_affine.to(source.device) kp_affine_p = utils.kp2im( pred_affine_kps_vis[0].detach().cpu().numpy(), target_affine[0].cpu().numpy(), radius=2) / 255 kp_affine_p = torch.from_numpy(kp_affine_p).permute( 2, 0, 1)[None] kp_affine_p = kp_affine_p.to(source.device) kp_mask = utils.kp2im( result_kps_vis[0].detach().cpu().numpy(), mask_3channels[0, 1].cpu().numpy(), radius=2) / 255 kp_mask = torch.from_numpy(kp_mask).permute(2, 0, 1)[None] kp_mask = kp_mask.to(source.device) grid = torch.cat([ source[:1], target[:1], kp_img[:1], kp_affine[:1], kp_affine_p[:1], kp_mask, reconstruct_display[:1] ], dim=3)[0] writer.add_image( 'iter {n} of image {i} (reconstruction, affine) = ({r},{a}) ' .format(r=loss_reconstruction, a=loss_affine, i=i, n=str(n_iter)), grid, n_iter) n_batch += 1 if (epoch == 20): #reset learning rate scheduler optimizer.param_groups[0]['lr'] = opts.lr lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.1, patience=0, threshold=1e-5) if (epoch < 20): alpha = 0.0 else: alpha = opts.alpha loss = loss_reconstruction + (alpha * total_loss_affine) optimizer.zero_grad() loss.backward() optimizer.step() avg_loss += loss.item() writer.add_scalar('Loss : ', loss, n_iter) writer.add_scalar('Reconstruction : ', loss_reconstruction, n_iter) writer.add_scalar('Affine : ', total_loss_affine, n_iter) n_iter += 1 avg_loss = avg_loss / len(dataloader) lr_scheduler.step(avg_loss) print('Epoch ', epoch, ' average loss ', avg_loss, ' learning rate ', optimizer.param_groups[0]['lr'])