def forward(self, source_image, kp_driving, kp_source):
        if self.scale_factor != 1:
            source_image = F.interpolate(source_image, scale_factor=(1, self.scale_factor, self.scale_factor))

        prediction = self.mask_embedding(source_image, kp_driving, kp_source)
        for block in self.group_blocks:
            prediction = block(prediction)
            prediction = F.leaky_relu(prediction, 0.2)
        prediction = self.hourglass(prediction)

        bs, _, d, h, w = prediction.shape
        if self.use_mask:
            mask = prediction[:, :(self.num_kp + 1)]
            mask = F.softmax(mask, dim=1)
            mask = mask.unsqueeze(2)
            difference_embedding = self.difference_embedding(source_image, kp_driving, kp_source)
            difference_embedding = difference_embedding.view(bs, self.num_kp + 1, 2, d, h, w)
            deformations_relative = (difference_embedding * mask).sum(dim=1)
        else:
            deformations_relative = 0

        if self.use_correction:
            correction = prediction[:, -2:]
        else:
            correction = 0

        deformations_relative = deformations_relative + correction
        deformations_relative = deformations_relative.permute(0, 2, 3, 4, 1)

        coordinate_grid = make_coordinate_grid((h, w), type=deformations_relative.type())
        coordinate_grid = coordinate_grid.view(1, 1, h, w, 2)
        deformation = deformations_relative + coordinate_grid
        z_coordinate = torch.zeros(deformation.shape[:-1] + (1,)).type(deformation.type())

        return torch.cat([deformation, z_coordinate], dim=-1)
    def __init__(self, bs, **kwargs):
        # noise = np.random.normal(loc=0, scale=kwargs['sigma_affine'], size=(bs, 2, 3))
        noise = paddle.distribution.Normal(loc=[0],
                                           scale=[kwargs['sigma_affine']
                                                  ]).sample([bs, 2, 3])
        noise = noise.reshape((bs, 2, 3))
        if TEST_MODE:
            noise = paddle.to_tensor(np.ones((bs, 2, 3)).astype(np.float32))

        self.theta = noise + paddle.tensor.eye(2, 3, dtype='float32').reshape(
            (1, 2, 3))
        self.bs = bs

        if ('sigma_tps' in kwargs) and ('points_tps' in kwargs):
            self.tps = True
            self.control_points = make_coordinate_grid(
                (kwargs['points_tps'], kwargs['points_tps'])).unsqueeze(0)
            if TEST_MODE:
                self.control_params = paddle.to_tensor(
                    np.ones(
                        (bs, 1, kwargs['points_tps']**2)).astype(np.float32))
            else:
                buf = paddle.distribution.Normal(
                    loc=[0], scale=[kwargs['sigma_tps']
                                    ]).sample([bs, 1, kwargs['points_tps']**2])
                self.control_params = buf.reshape(
                    (bs, 1, kwargs['points_tps']**2))
        else:
            self.tps = False
Exemple #3
0
 def transform_frame(self, frame):
     grid = make_coordinate_grid(frame.shape[2:],
                                 type=frame.type()).unsqueeze(0)
     grid = grid.view(1, frame.shape[2] * frame.shape[3], 2)
     grid = self.warp_coordinates(grid).view(self.bs, frame.shape[2],
                                             frame.shape[3], 2)
     return F.grid_sample(frame, grid, padding_mode="reflection")
    def create_sparse_motions(self, source_image, kp_driving, kp_source):
        """
        Eq 4. in the paper T_{s<-d}(z)
        """
        bs, _, h, w = source_image.shape
        identity_grid = make_coordinate_grid((h, w))
        identity_grid = identity_grid.reshape((1, 1, h, w, 2))
        coordinate_grid = identity_grid - kp_driving['value'].reshape(
            (bs, self.num_kp, 1, 1, 2))
        if 'jacobian' in kp_driving:
            jacobian = paddle.matmul(kp_source['jacobian'],
                                     paddle.inverse(kp_driving['jacobian']))
            dim_1, dim_2, *else_dim = jacobian.shape
            jacobian = jacobian.reshape((-1, *else_dim))
            jacobian = jacobian.unsqueeze(-3).unsqueeze(-3)
            jacobian = jacobian.tile((1, h, w, 1, 1))

            _, _, *dimm = coordinate_grid.shape
            coordinate_grid = coordinate_grid.reshape((-1, *dimm))
            coordinate_grid = paddle.matmul(jacobian,
                                            coordinate_grid.unsqueeze(-1))
            coordinate_grid = coordinate_grid.squeeze(-1)
            coordinate_grid = coordinate_grid.reshape(
                (dim_1, dim_2, *(coordinate_grid.shape[1:])))

        driving_to_source = coordinate_grid + kp_source['value'].reshape(
            (bs, self.num_kp, 1, 1, 2))

        # adding background feature
        identity_grid = identity_grid.tile((bs, 1, 1, 1, 1))
        sparse_motions = paddle.concat([identity_grid, driving_to_source],
                                       axis=1)
        return sparse_motions
def kp2gaussian(kp, spatial_size, kp_variance='matrix'):
    """
    Transform a keypoint into gaussian like representation
    """
    mean = kp['mean']

    coordinate_grid = make_coordinate_grid(spatial_size, mean.type())

    number_of_leading_dimensions = len(mean.shape) - 1
    shape = (1, ) * number_of_leading_dimensions + coordinate_grid.shape

    coordinate_grid = coordinate_grid.view(*shape)
    repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1)
    coordinate_grid = coordinate_grid.repeat(*repeats)

    # Preprocess kp shape
    shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 2)
    mean = mean.view(*shape)

    mean_sub = (coordinate_grid - mean)
    if kp_variance == 'matrix':
        var = kp['var']
        inv_var = matrix_inverse(var)
        shape = inv_var.shape[:number_of_leading_dimensions] + (1, 1, 2, 2)
        inv_var = inv_var.view(*shape)
        under_exp = torch.matmul(torch.matmul(mean_sub.unsqueeze(-2), inv_var),
                                 mean_sub.unsqueeze(-1))
        under_exp = under_exp.squeeze(-1).squeeze(-1)
        out = torch.exp(-0.5 * under_exp)
    elif kp_variance == 'single':
        out = torch.exp(-0.5 * (mean_sub**2).sum(-1) / kp['var'])
    else:
        out = torch.exp(-0.5 * (mean_sub**2).sum(-1) / kp_variance)

    return out
Exemple #6
0
 def transform_frame(self, frame):
     grid = fluid.layers.unsqueeze(
         make_coordinate_grid(frame.shape[2:], 'float32'), [0])
     grid = fluid.layers.reshape(grid,
                                 (1, frame.shape[2] * frame.shape[3], 2))
     grid = fluid.layers.reshape(
         self.warp_coordinates(grid),
         (self.bs, frame.shape[2], frame.shape[3], 2))
     if TEST_MODE:
         bf = fluid.layers.grid_sampler(frame, grid)
         logging.warning(
             'TEST MODE Output of fluid.layers.grid_sampler == 2. model:L152'
         )
         return fluid.dygraph.to_variable(
             np.ones(bf.shape).astype(np.float32) * 2)
     # 0.0.0c 分支等待更新
     elif PP_v2:
         # return fluid.layers.grid_sampler(frame, grid)
         return fluid.layers.grid_sampler(frame,
                                          grid,
                                          mode='bilinear',
                                          padding_mode='reflect',
                                          align_corners=False)
     else:
         return fluid.layers.grid_sampler(frame, grid)
Exemple #7
0
    def create_sparse_motions(self, source_image, kp_driving, kp_source):
        """
        Eq 4. in the paper T_{s<-d}(z)
        """
        bs, _, h, w = source_image.shape
        identity_grid = make_coordinate_grid((h, w),
                                             type=kp_source['value'].type())
        identity_grid = identity_grid.view(1, 1, h, w, 2)
        coordinate_grid = identity_grid - kp_driving['value'].view(
            bs, self.num_kp, 1, 1, 2)
        if 'jacobian' in kp_driving:
            jacobian = torch.matmul(kp_source['jacobian'],
                                    torch.inverse(kp_driving['jacobian']))
            jacobian = jacobian.unsqueeze(-3).unsqueeze(-3)
            jacobian = jacobian.repeat(1, 1, h, w, 1, 1)
            coordinate_grid = torch.matmul(jacobian,
                                           coordinate_grid.unsqueeze(-1))
            coordinate_grid = coordinate_grid.squeeze(-1)

        driving_to_source = coordinate_grid + kp_source['value'].view(
            bs, self.num_kp, 1, 1, 2)

        #adding background feature
        identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1)
        sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1)
        return sparse_motions
    def forward(self, appearance_frame, kp_video, kp_appearance):
        bs, _, _, h, w = appearance_frame.shape
        _, d, num_kp, _ = kp_video['mean'].shape
        coordinate_grid = make_coordinate_grid((h, w), type=appearance_frame.type())
        coordinate_grid = coordinate_grid.view(1, 1, h, w, 2).repeat(bs, d, 1, 1, 1)

        z_coordinate = torch.zeros(coordinate_grid.shape[:-1] + (1,)).type(coordinate_grid.type())
        return torch.cat([coordinate_grid, z_coordinate], dim=-1)
    def gaussian2kp(self, heatmap):

        shape = heatmap.shape
        heatmap = heatmap.unsqueeze(-1)
        grid = make_coordinate_grid(shape[2:],
                                    heatmap.type()).unsqueeze_(0).unsqueeze_(0)
        value = (heatmap * grid).sum(dim=(2, 3))
        kp = {'value': value}

        return kp
 def transform_frame(self, frame):
     grid = make_coordinate_grid(frame.shape[2:], 'float32').unsqueeze(0)
     grid = grid.reshape((1, frame.shape[2] * frame.shape[3], 2))
     grid = self.warp_coordinates(grid).reshape(
         (self.bs, frame.shape[2], frame.shape[3], 2))
     return F.grid_sample(frame,
                          grid,
                          mode='bilinear',
                          padding_mode='reflection',
                          align_corners=True)
Exemple #11
0
    def gaussian2kp(self, heatmap):
        """
        Extract the mean and the variance from a heatmap
        """
        shape = heatmap.shape
        heatmap = heatmap.unsqueeze(-1)
        grid = make_coordinate_grid(shape[2:], heatmap.type()).unsqueeze_(0).unsqueeze_(0)
        result = (heatmap * grid).sum(dim=(2, 3))

        return result
    def gaussian2kp(self, heatmap):
        """
        Extract the mean and from a heatmap
        """
        shape = heatmap.shape
        heatmap = heatmap.unsqueeze(-1)
        grid = make_coordinate_grid(shape[2:], heatmap.type())
        grid = grid.unsqueeze(0)
        grid = grid.unsqueeze(0)
        value = (heatmap * grid).sum(dim=(2, 3))

        return value
Exemple #13
0
    def __init__(self, bs, **kwargs):
        noise = torch.normal(mean=0, std=kwargs['sigma_affine'] * torch.ones([bs, 2, 3]))
        self.theta = noise + torch.eye(2, 3).view(1, 2, 3)
        self.bs = bs

        if ('sigma_tps' in kwargs) and ('points_tps' in kwargs):
            self.tps = True
            self.control_points = make_coordinate_grid((kwargs['points_tps'], kwargs['points_tps']), type=noise.type())
            self.control_points = self.control_points.unsqueeze(0)
            self.control_params = torch.normal(mean=0,
                                               std=kwargs['sigma_tps'] * torch.ones([bs, 1, kwargs['points_tps'] ** 2]))
        else:
            self.tps = False
Exemple #14
0
    def gaussian2kp(self, heatmap):
        """
        Extract the mean and from a heatmap
        """
        shape = heatmap.shape  # B, 10, 58, 58
        heatmap = heatmap.unsqueeze(-1)  # B, 10, 58, 58, 1
        grid = make_coordinate_grid(shape[2:],
                                    heatmap.type()).unsqueeze_(0).unsqueeze_(
                                        0)  # 1, 1, 58, 58, 2
        value = (heatmap * grid).sum(dim=(2, 3))  # B, 10, 2
        kp = {'value': value}

        return kp
Exemple #15
0
    def region2affine(self, region):
        shape = region.shape
        region = region.unsqueeze(-1)
        grid = make_coordinate_grid(shape[2:], region.type()).unsqueeze_(0).unsqueeze_(0)
        mean = (region * grid).sum(dim=(2, 3))

        region_params = {'shift': mean}

        if self.pca_based:
            mean_sub = grid - mean.unsqueeze(-2).unsqueeze(-2)
            covar = torch.matmul(mean_sub.unsqueeze(-1), mean_sub.unsqueeze(-2))
            covar = covar * region.unsqueeze(-1)
            covar = covar.sum(dim=(2, 3))
            region_params['covar'] = covar

        return region_params
    def create_sparse_motions(self, source_image, kp_driving_value, kp_driving_jacobian, kp_source_value, kp_source_jacobian):
        """
        Eq 4. in the paper T_{s<-d}(z)
        """
        bs, _, h, w = source_image.shape
        identity_grid = make_coordinate_grid((h, w), type=kp_source_value.type())
        identity_grid = identity_grid.view(1, 1, h, w, 2)
        coordinate_grid = identity_grid - kp_driving_value.view(bs, self.num_kp, 1, 1, 2)
        if kp_driving_value is not None:
            # TODO: Replace torch.inverse not implemented in coreml
            jacobian = torch.matmul(kp_source_jacobian, torch.inverse(kp_driving_jacobian))
            jacobian = jacobian.unsqueeze(-3).unsqueeze(-3)
            jacobian = jacobian.repeat(1, 1, h, w, 1, 1)
            coordinate_grid = torch.matmul(jacobian, coordinate_grid.unsqueeze(-1))
            coordinate_grid = coordinate_grid.squeeze(-1)

        driving_to_source = coordinate_grid + kp_source_value.view(bs, self.num_kp, 1, 1, 2)

        #adding background feature
        identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1)
        sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1)
        return sparse_motions
Exemple #17
0
def kp2gaussian(kp, spatial_size, kp_variance):
    """
    Transform a keypoint into gaussian like representation
    """
    mean = kp['value']

    coordinate_grid = make_coordinate_grid(spatial_size, mean.type())
    number_of_leading_dimensions = len(mean.shape) - 1
    shape = (1, ) * number_of_leading_dimensions + coordinate_grid.shape
    coordinate_grid = coordinate_grid.view(*shape)
    repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1)
    coordinate_grid = coordinate_grid.repeat(*repeats)

    # Preprocess kp shape
    shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 2)
    mean = mean.view(*shape)

    mean_sub = (coordinate_grid - mean)

    out = torch.exp(-0.5 * (mean_sub**2).sum(-1) / kp_variance)

    return out
Exemple #18
0
    def create_sparse_motions(self,
                              source_image,
                              driving_region_params,
                              source_region_params,
                              bg_params=None):
        bs, _, h, w = source_image.shape
        identity_grid = make_coordinate_grid(
            (h, w), type=source_region_params['shift'].type())
        identity_grid = identity_grid.view(1, 1, h, w, 2)
        coordinate_grid = identity_grid - driving_region_params['shift'].view(
            bs, self.num_regions, 1, 1, 2)
        if 'affine' in driving_region_params:
            affine = torch.matmul(
                source_region_params['affine'],
                torch.inverse(driving_region_params['affine']))
            if self.revert_axis_swap:
                affine = affine * torch.sign(affine[:, :, 0:1, 0:1])
            affine = affine.unsqueeze(-3).unsqueeze(-3)
            affine = affine.repeat(1, 1, h, w, 1, 1)
            coordinate_grid = torch.matmul(affine,
                                           coordinate_grid.unsqueeze(-1))
            coordinate_grid = coordinate_grid.squeeze(-1)

        driving_to_source = coordinate_grid + source_region_params[
            'shift'].view(bs, self.num_regions, 1, 1, 2)

        # adding background feature
        if bg_params is None:
            bg_grid = identity_grid.repeat(bs, 1, 1, 1, 1)
        else:
            bg_grid = identity_grid.repeat(bs, 1, 1, 1, 1)
            bg_grid = to_homogeneous(bg_grid)
            bg_grid = torch.matmul(bg_params.view(bs, 1, 1, 1, 3, 3),
                                   bg_grid.unsqueeze(-1)).squeeze(-1)
            bg_grid = from_homogeneous(bg_grid)

        sparse_motions = torch.cat([bg_grid, driving_to_source], dim=1)

        return sparse_motions
    def segment_motion(self, seg_target, seg_source):
        bs, _, h, w = seg_target['segmentation'].shape
        identity_grid = make_coordinate_grid((h, w),
                                             type=seg_source['shift'].type())
        identity_grid = identity_grid.view(1, 1, h, w, 2)
        coordinate_grid = identity_grid - seg_target['shift'].view(
            bs, self.num_segments, 1, 1, 2)
        if 'affine' in seg_target:
            affine = torch.matmul(seg_source['affine'],
                                  torch.inverse(seg_target['affine']))
            affine = affine.unsqueeze(-3).unsqueeze(-3)
            affine = affine.repeat(1, 1, h, w, 1, 1)
            coordinate_grid = torch.matmul(affine,
                                           coordinate_grid.unsqueeze(-1))
            coordinate_grid = coordinate_grid.squeeze(-1)

        target_to_source = coordinate_grid + seg_source['shift'].view(
            bs, self.num_segments, 1, 1, 2)

        # adding background feature
        identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1)
        return torch.cat([identity_grid, target_to_source], dim=1)
def gaussian2kp(heatmap, kp_variance='matrix', clip_variance=None):
    """
    Extract the mean and the variance from a heatmap
    """
    shape = heatmap.shape
    #adding small eps to avoid 'nan' in variance
    heatmap = heatmap.unsqueeze(-1) + 1e-7
    grid = make_coordinate_grid(
        shape[3:], heatmap.type()).unsqueeze_(0).unsqueeze_(0).unsqueeze_(0)

    mean = (heatmap * grid).sum(dim=(3, 4))

    kp = {'mean': mean.permute(0, 2, 1, 3)}

    if kp_variance == 'matrix':
        mean_sub = grid - mean.unsqueeze(-2).unsqueeze(-2)
        var = torch.matmul(mean_sub.unsqueeze(-1), mean_sub.unsqueeze(-2))
        var = var * heatmap.unsqueeze(-1)
        var = var.sum(dim=(3, 4))
        var = var.permute(0, 2, 1, 3, 4)
        if clip_variance:
            min_norm = torch.tensor(clip_variance).type(var.type())
            sg = smallest_singular(var).unsqueeze(-1)
            var = torch.max(min_norm, sg) * var / sg
        kp['var'] = var

    elif kp_variance == 'single':
        mean_sub = grid - mean.unsqueeze(-2).unsqueeze(-2)
        var = mean_sub**2
        var = var * heatmap
        var = var.sum(dim=(3, 4))
        var = var.mean(dim=-1, keepdim=True)
        var = var.unsqueeze(-1)
        var = var.permute(0, 2, 1, 3, 4)
        kp['var'] = var

    return kp
Exemple #21
0
    def __init__(self, bs, **kwargs):
        # noise = np.random.normal(loc=0, scale=kwargs['sigma_affine'], size=(bs, 2, 3))
        noise = fluid.layers.Normal(loc=[0], scale=[kwargs['sigma_affine']
                                                    ]).sample([bs, 2, 3])
        noise = fluid.layers.reshape(noise, (bs, 2, 3))
        if TEST_MODE:
            logging.warning(
                'TEST MODE: Transform.noise == np.ones model.py:L135')
            noise = dygraph.to_variable(np.ones((bs, 2, 3)).astype(np.float32))

        self.theta = noise + fluid.layers.reshape(fluid.layers.eye(2, 3),
                                                  (1, 2, 3))
        self.bs = bs

        if ('sigma_tps' in kwargs) and ('points_tps' in kwargs):
            self.tps = True
            self.control_points = make_coordinate_grid(
                (kwargs['points_tps'], kwargs['points_tps']), 'float32')
            self.control_points = fluid.layers.unsqueeze(
                self.control_points, [0])
            if TEST_MODE:
                logging.warning(
                    'TEST MODE: Transform.control_params == np.ones model.py:L144'
                )
                self.control_params = dygraph.to_variable(
                    np.ones((bs, 1, kwargs['points_tps']**2)))
            else:
                buf = fluid.layers.Normal(loc=[0],
                                          scale=[kwargs['sigma_tps']]).sample(
                                              [bs, 1, kwargs['points_tps']**2])
                self.control_params = fluid.layers.reshape(
                    buf, (bs, 1, kwargs['points_tps']**2))
                # self.control_params = dygraph.to_variable(
                #     np.random.normal(loc=0, scale=kwargs['sigma_tps'], size=(bs, 1, kwargs['points_tps'] ** 2)))
        else:
            self.tps = False
    def forward(self, source_image, kp_driving, kp_source):
        if self.scale_factor != 1:
            source_image = F.interpolate(source_image,
                                         scale_factor=(1, self.scale_factor,
                                                       self.scale_factor))

        spatial_size = source_image.shape[3:]

        bs, _, _, h, w = source_image.shape
        _, d, num_kp, _ = kp_driving['mean'].shape

        inputs = []
        if self.use_heatmap:
            heatmap = self.normalize_heatmap(
                kp2gaussian(kp_driving,
                            spatial_size=spatial_size,
                            kp_variance=self.kp_variance))
            if self.heatmap_type == 'difference':
                heatmap_appearance = self.normalize_heatmap(
                    kp2gaussian(kp_source,
                                spatial_size=spatial_size,
                                kp_variance=self.kp_variance))
                heatmap = heatmap - heatmap_appearance
            if self.add_bg_feature_map:
                zeros = torch.zeros(bs, d, 1, h, w).type(heatmap.type())
                heatmap = torch.cat([zeros, heatmap], dim=2)
            heatmap = heatmap.unsqueeze(3)
            inputs.append(heatmap)

        num_kp += self.add_bg_feature_map
        if self.use_difference or self.use_deformed_source_image:
            kp_video_diff = kp_source['mean'] - kp_driving['mean']
            if self.add_bg_feature_map:
                zeros = torch.zeros(bs, d, 1, 2).type(kp_video_diff.type())
                kp_video_diff = torch.cat([zeros, kp_video_diff], dim=2)
            kp_video_diff = kp_video_diff.view(
                (bs, d, num_kp, 2, 1, 1)).repeat(1, 1, 1, 1, h, w)

        if self.use_difference:
            inputs.append(kp_video_diff)

        if self.use_deformed_source_image:
            appearance_repeat = source_image.unsqueeze(1).unsqueeze(1).repeat(
                1, d, num_kp, 1, 1, 1, 1)
            appearance_repeat = appearance_repeat.view(bs * d * num_kp, -1, h,
                                                       w)

            deformation_approx = kp_video_diff.view(
                (bs * d * num_kp, -1, h, w)).permute(0, 2, 3, 1)
            coordinate_grid = make_coordinate_grid(
                (h, w), type=deformation_approx.type())
            coordinate_grid = coordinate_grid.view(1, h, w, 2)
            deformation_approx = coordinate_grid + deformation_approx

            appearance_approx_deform = F.grid_sample(appearance_repeat,
                                                     deformation_approx)
            appearance_approx_deform = appearance_approx_deform.view(
                (bs, d, num_kp, -1, h, w))
            inputs.append(appearance_approx_deform)

        movement_encoding = torch.cat(inputs, dim=3)
        movement_encoding = movement_encoding.view(bs, d, -1, h, w)

        return movement_encoding.permute(0, 2, 1, 3, 4)