Esempio n. 1
0
        def op_script(data: torch.Tensor, mean: torch.Tensor,
                      std: torch.Tensor) -> torch.Tensor:
            return kornia.normalize(data, mean, std)

            data = torch.ones(2, 3, 1, 1).to(device)
            data += 2

            mean = torch.tensor([0.5, 1.0, 2.0]).repeat(2, 1).to(device)
            std = torch.tensor([2.0, 2.0, 2.0]).repeat(2, 1).to(device)

            actual = op_script(data, mean, std)
            expected = kornia.normalize(data, mean, std)
            assert_allclose(actual, expected)
Esempio n. 2
0
 def forward(self, input: torch.Tensor) -> List[torch.Tensor]:
     '''
     Forward pass of the model
     :param input: (torch.Tenor) Input tensor of shape (batch size, channels, height, width)
     :return: (List[torch.Tensor]) List of intermediate features in ascending oder w.r.t. the number VGG layer
     '''
     # Adopt grayscale to rgb if needed
     if input.shape[1] == 1:
         output = input.repeat_interleave(3, dim=1)
     else:
         output = input
     # Normalize input
     output = kornia.normalize(output,
                               mean=torch.tensor([0.485, 0.456, 0.406], device=output.device),
                               std=torch.tensor([0.229, 0.224, 0.225], device=output.device))
     # Init list for features
     features = []
     # Feature path
     for layer in self.vgg16.features:
         output = layer(output)
         if isinstance(layer, nn.MaxPool2d):
             features.append(output)
     # Average pool operation
     output = self.vgg16.avgpool(output)
     # Flatten tensor
     output = output.flatten(start_dim=1)
     # Classification path
     for index, layer in enumerate(self.vgg16.classifier):
         output = layer(output)
         if index == 3 or index == 6:
             features.append(output)
     if self.return_output:
         return output
     return features
Esempio n. 3
0
    def test_single_value(self, device):
        # prepare input data
        mean = torch.tensor(2).to(device)
        std = torch.tensor(3).to(device)
        data = torch.ones(2, 3, 256, 313).to(device)

        # expected output
        expected = (data - mean) / std

        assert_allclose(kornia.normalize(data, mean, std), expected)
Esempio n. 4
0
    def _get_transformed_images(images, hflip):

        images_transformed = images

        if hflip:
            images_transformed = K.hflip(images_transformed)

        # Normalize
        images_transformed = K.normalize(images_transformed, 0.5, 0.5)

        return images_transformed
Esempio n. 5
0
    def _get_transformed_frames(frames, hflip):

        frames_transformed = frames

        if hflip:
            frames_transformed = K.hflip(frames_transformed)

        # Normalize
        frames_transformed = K.normalize(frames_transformed, 0.5, 0.5)

        # Permute CTHW
        frames_transformed = frames_transformed.permute(1, 0, 2, 3)

        return frames_transformed
Esempio n. 6
0
    def closure():
        nonlocal global_step
        global_step += 1

        if torch.is_grad_enabled():
            optimizer.zero_grad()

        _, _, model_images = flame()
        model_grays = rgb_to_grayscale(model_images)
        model_grays = normalize(model_grays, model_grays.mean(),
                                model_grays.std())
        model_heatmaps = estimator(model_grays)[-1]

        # loss_landmarks = 50 * criterion_landmarks(get_target_landmarks(target_landmarks), original_landmarks)
        # loss = loss_landmarks
        # log('loss_landmarks', loss_landmarks.item(), global_step)

        loss_simple = criterion_simple(model_heatmaps, heatmaps)
        loss = loss_simple
        log('loss_simple', loss_simple.item(), global_step)

        model_heatmaps_gray = get_heatmap_gray(model_heatmaps)
        if criterion_gp is not None:
            loss_gp = arg.loss_gp_lambda * criterion_gp(
                model_heatmaps_gray, heatmaps_gray) + 100
            loss = loss + loss_gp
            log('loss_gp', loss_gp.item(), global_step)

        log('loss', loss.item(), global_step)

        if loss.requires_grad:
            loss.backward(retain_graph=True)

        log_img(
            'original',
            derescale_0_1(heatmaps_gray[0].unsqueeze(0), 0,
                          255).detach().to(dtype=torch.uint8), global_step)
        log_img(
            'rendered',
            derescale_0_1(model_images, 0,
                          255)[0].detach().to(dtype=torch.uint8), global_step)
        log_img(
            'target',
            derescale_0_1(model_heatmaps_gray[0].unsqueeze(0), 0,
                          255).detach().to(dtype=torch.uint8), global_step)

        return loss
Esempio n. 7
0
    def preprocess(self, images):
        """
        Preprocess images
        Args:
            images: (N, 3, H, W), Input images
        Return
            x: (N, 3, H, W), Preprocessed images
        """
        x = images
        if self.pretrained:
            # Create a mask for padded pixels
            mask = torch.isnan(x)

            # Match ResNet pretrained preprocessing
            x = kornia.normalize(x, mean=self.norm_mean, std=self.norm_std)

            # Make padded pixels = 0
            x[mask] = 0

        return x
Esempio n. 8
0
    def __getitem__(self, item):
        dataset_route, dataset, split, type, annotation, crop_size, RGB, sigma, trans_ratio, rotate_limit,\
        scale_ratio_up, scale_ratio_down, scale_horizontal, scale_vertical =\
            self.arg.dataset_route, self.dataset, self.split, self.type,\
            self.list[item], self.arg.crop_size, self.arg.RGB, self.arg.sigma,\
            self.arg.trans_ratio, self.arg.rotate_limit, self.arg.scale_ratio_up, self.arg.scale_ratio_down,\
            self.arg.scale_horizontal, self.arg.scale_vertical

        pic_orig = cv2.imread(dataset_route[dataset] + annotation[-1])
        coord_x = list(map(float, annotation[:2 * kp_num[dataset]:2]))
        coord_y = list(map(float, annotation[1:2 * kp_num[dataset]:2]))
        bbox = np.array(list(map(int, annotation[-7:-3])))

        translation, trans_dir, rotation, scaling, scaling_horizontal, scaling_vertical, flip, gaussian_blur = get_random_transform_param(
            type,
            bbox,
            trans_ratio,
            rotate_limit,
            scale_ratio_up,
            scale_ratio_down,
            scale_horizontal,
            scale_vertical,
            flip=False,
            gaussian=False)

        horizontal_add = (bbox[2] - bbox[0]) * (1 - scaling)
        vertical_add = (bbox[3] - bbox[1]) * (1 - scaling)
        bbox = np.float32([
            bbox[0] - horizontal_add, bbox[1] - vertical_add,
            bbox[2] + horizontal_add, bbox[3] + vertical_add
        ])

        horizontal_add = (bbox[2] - bbox[0]) * scaling_horizontal
        vertical_add = (bbox[3] - bbox[1]) * scaling_vertical
        bbox = np.float32([
            bbox[0] - horizontal_add, bbox[1] - vertical_add,
            bbox[2] + horizontal_add, bbox[3] + vertical_add
        ])

        position_before = np.float32(
            [[
                int(bbox[0]) + pow(-1, trans_dir + 1) * translation,
                int(bbox[1]) + pow(-1, trans_dir // 2 + 1) * translation
            ],
             [
                 int(bbox[0]) + pow(-1, trans_dir + 1) * translation,
                 int(bbox[3]) + pow(-1, trans_dir // 2 + 1) * translation
             ],
             [
                 int(bbox[2]) + pow(-1, trans_dir + 1) * translation,
                 int(bbox[3]) + pow(-1, trans_dir // 2 + 1) * translation
             ]])
        position_after = np.float32([[0, 0], [0, crop_size - 1],
                                     [crop_size - 1, crop_size - 1]])
        crop_matrix = cv2.getAffineTransform(position_before, position_after)
        # crop_matrix = np.vstack([crop_matrix, [0, 0, 1]])
        pic_affine_orig = cv2.warpAffine(pic_orig,
                                         crop_matrix, (crop_size, crop_size),
                                         borderMode=cv2.BORDER_REPLICATE)
        # width_height = (bbox[2] - bbox[0], bbox[3] - bbox[1])
        width_height = (crop_size, crop_size)
        affine_matrix = get_affine_matrix(width_height, rotation, scaling)
        # affine_matrix = np.vstack([affine_matrix, [0, 0, 1]])
        # affine_matrix = np.matmul(crop_matrix, affine_matrix)
        # TODO one transform
        pic_affine_orig = cv2.warpAffine(pic_affine_orig,
                                         affine_matrix, (crop_size, crop_size),
                                         borderMode=cv2.BORDER_REPLICATE)
        pic_affine_orig = further_transform(
            pic_affine_orig, bbox, flip,
            gaussian_blur) if type in ['train'] else pic_affine_orig

        # show_img(pic_affine_orig, wait=0, keep=False)
        pic_affine_orig = bgr_to_rgb(image_to_tensor(pic_affine_orig))

        pic_affine_orig_norm = normalize(pic_affine_orig,
                                         torch.from_numpy(self.mean_color),
                                         torch.from_numpy(self.std_color))
        if not RGB:
            pic_affine = convert_img_to_gray(pic_affine_orig)
            pic_affine = normalize(pic_affine, self.mean_gray, self.std_gray)
        else:
            pic_affine = pic_affine_orig_norm

        coord_x_cropped, coord_y_cropped = get_cropped_coords(dataset,
                                                              crop_matrix,
                                                              coord_x,
                                                              coord_y,
                                                              crop_size,
                                                              flip=flip)
        gt_coords_xy = get_gt_coords(dataset, affine_matrix, coord_x_cropped,
                                     coord_y_cropped)

        return pic_affine, pic_affine_orig_norm, gt_coords_xy