Esempio n. 1
0
def gaussian_coordinate_to_keypoint_map(yx_mean_stddev_corr,
                                        km_h,
                                        km_w,
                                        dtype=None):

    input_shape = tmf.get_shape(yx_mean_stddev_corr)
    assert len(input_shape) == 3, "wrong rank"

    input_tensor_list = list()
    input_tensor_list.append(yx_mean_stddev_corr)
    if input_shape[2] < 3:
        input_tensor_list.append(
            tf.ones(input_shape[:2] + [2], dtype=yx_mean_stddev_corr.dtype) *
            gaussian_2d_base_stddev)
    elif input_shape[2] < 4:
        input_tensor_list.append(yx_mean_stddev_corr[:, :, 2:3])
    if input_shape[2] < 5:
        input_tensor_list.append(
            tf.zeros(input_shape[:2] + [1], dtype=yx_mean_stddev_corr.dtype))
    yx_mean_stddev_corr = tf.concat(input_tensor_list, axis=2)

    input_shape = tmf.get_shape(yx_mean_stddev_corr)
    assert input_shape[2] == 5, "wrong parameter number"

    if dtype is None:
        dtype = yx_mean_stddev_corr.dtype

    # batch_size = input_shape[0]
    # keypoint_num = input_shape[1]

    yx_map = yx_grid_map(km_h, km_w, dtype,
                         aspect_ratio=km_w / km_h)  # [1, H, W, 1, 2]

    p_map = tmf.expand_dims(yx_mean_stddev_corr, axis=1,
                            ndims=2)  # [batch_size, 1, 1, keypoint_num, 5]

    det_map = tmf.expand_dims(gaussian2d_det(yx_mean_stddev_corr),
                              axis=1,
                              ndims=2)

    yx_zm_map = yx_map - p_map[:, :, :, :, 0:2]  # y, x : zero mean
    yx_zm_map_2 = tf.square(yx_zm_map)
    m_map = p_map[:, :, :, :, 2:]  # sigma_y, sigma_x, corr_yx
    m_map_2 = tf.square(m_map)

    u_numerator = (
        yx_zm_map_2[:, :, :, :, 0] * m_map_2[:, :, :, :, 1] +
        yx_zm_map_2[:, :, :, :, 1] * m_map_2[:, :, :, :, 0] -
        2. * tf.reduce_prod(yx_zm_map, axis=4) * tf.reduce_prod(m_map, axis=4))
    u_denominator = (tf.square(m_map_2[:, :, :, :, 2]) -
                     1.) * m_map_2[:, :, :, :, 0] * m_map_2[:, :, :, :,
                                                            1] - epsilon
    keypoint_map = tmf.safe_exp(0.5 * (u_numerator / u_denominator)) / (
        (2. * math.pi * det_map + epsilon) * (km_h * km_w))
    keypoint_map /= km_h * km_w  # normalize to probability mass

    return keypoint_map
Esempio n. 2
0
def yx_grid_map(h, w, dtype, aspect_ratio=1.):
    arsr = math.sqrt(aspect_ratio)

    h_max = 1 / arsr
    w_max = arsr

    y_map = tmf.normalized_index_template(h, dtype=dtype, offset=0.5) * h_max
    y_map = tmf.expand_dims(tmf.expand_dims(y_map, axis=0, ndims=1),
                            axis=-1,
                            ndims=3)
    x_map = tmf.normalized_index_template(w, dtype=dtype, offset=0.5) * w_max
    x_map = tmf.expand_dims(tmf.expand_dims(x_map, axis=0, ndims=2),
                            axis=-1,
                            ndims=2)
    yx_map = tf.concat(
        [tf.tile(y_map, [1, 1, w, 1, 1]),
         tf.tile(x_map, [1, h, 1, 1, 1])],
        axis=4)  # [1, H, W, 1, 2]
    return yx_map
Esempio n. 3
0
    def decode_deterministic(self,
                             structure_param,
                             patch_features,
                             overall_features,
                             extra_inputs=None,
                             mos=None,
                             default_reuse=None):

        if not self.allow_overall:
            assert overall_features is None, "Do not support overall_features"

        if mos is None:
            mos = asu.ModuleOutputStrip()

        with tf.variable_scope("deterministic", reuse=default_reuse):
            # build heatmap
            raw_heatmap_list = mos(
                self.structure2heatmap(structure_param,
                                       extra_inputs=extra_inputs))
            if not isinstance(raw_heatmap_list, (list, tuple)):
                raw_heatmap_list = [raw_heatmap_list]
            heatmap_list = list()
            for the_heatmap in raw_heatmap_list:
                heatmap_list.append(the_heatmap)
            mos.extra_outputs["save"]["heatmap"] = heatmap_list[0]
            heatmap = tf.concat(heatmap_list, axis=3)

            # build feature map (if needed)
            if patch_features is not None:
                # patch_features: [batch_size, struct_num, channels]
                # heatmap: [batch_size, h, w, struct_num]
                patch_features_e = tmf.expand_dims(patch_features,
                                                   axis=1,
                                                   ndims=2)
                feature_map_list = list()
                for the_heatmap in heatmap_list:
                    the_heatmap_e = tf.expand_dims(the_heatmap, axis=-1)
                    the_feature_map = tf.reduce_sum(patch_features_e *
                                                    the_heatmap_e,
                                                    axis=3)
                    feature_map_list.append(the_feature_map)
                feature_map = tf.concat(feature_map_list, axis=3)
                feature_map = tf.concat([heatmap, feature_map], axis=3)
            else:
                feature_map = heatmap

            im = mos(
                self.feature2image_with_overall(feature_map, overall_features))
            im = call_func_with_ignored_args(self.post_image_reconstruction,
                                             im,
                                             extra_inputs=extra_inputs)

        return im, mos
Esempio n. 4
0
def parse_landmark_condition_struct(keypoint_struct,
                                    img_size=None,
                                    full_img_size=None):

    # parse the landmark locations
    keypoint_param = keypoint_struct["location"]  # [0,1/sqrt(a)]*[0,1/sqrt(a)]
    keypoint_gate = keypoint_struct["gate"]

    # canonicalize the location representation
    if img_size is not None and img_size != full_img_size:  # y,x
        y_factor = img_size[0] / full_img_size[0]
        x_factor = img_size[1] / full_img_size[1]
        keypoint_param = (keypoint_param - 0.5) * tmf.expand_dims(
            [y_factor, x_factor], axis=0, ndims=2) + 0.5
        a = full_img_size[1] / full_img_size[0]
    """
    if a != 1:
        arsr = math.sqrt(a)
        keypoint_param *= tmf.expand_dims([1/arsr, arsr], axis=0, ndims=2)
    """

    return keypoint_param, keypoint_gate
Esempio n. 5
0
 def cyclic_clip(a):
     c = tmf.expand_dims([nh, nw], axis=0, ndims=len(tmf.get_shape(a)) - 1)
     return tf.where(a > c, a - c, tf.where(a < 0, a + c, a))
Esempio n. 6
0
def keypoint_map_to_gaussian_coordinate(keypoint_map,
                                        diag_cov=None,
                                        use_hard_max_as_anchors=None):
    """

    :param keypoint_map:
    :return:
    """

    if diag_cov is None:
        diag_cov = False
    if use_hard_max_as_anchors is None:
        use_hard_max_as_anchors = False

    keypoint_shape = tmf.get_shape(keypoint_map)
    batch_size = keypoint_shape[0]
    km_h = keypoint_shape[1]
    km_w = keypoint_shape[2]
    keypoint_num = keypoint_shape[3]
    km_a = km_w / km_h
    nh = 1 / math.sqrt(km_a)
    nw = 1 * math.sqrt(km_a)

    def cyclic_clip(a):
        c = tmf.expand_dims([nh, nw], axis=0, ndims=len(tmf.get_shape(a)) - 1)
        return tf.where(a > c, a - c, tf.where(a < 0, a + c, a))

    yx_map_raw = yx_grid_map(km_h, km_w, keypoint_map.dtype,
                             aspect_ratio=km_a)  # [1, H, W, 1, 2]
    yx_map_raw = tf.reshape(yx_map_raw,
                            [1, km_h * km_w, 1, 2])  # [1, H*W, 1, 2]

    k_map = tf.reshape(keypoint_map,
                       [batch_size, km_h * km_w, keypoint_num, 1
                        ])  # [batch_size, H*W, keypoint_num, 1]
    k_summed = tf.reduce_sum(
        k_map, axis=1) + epsilon  # [batch_size, H*W, keypoint_num, 1]

    if use_hard_max_as_anchors:
        # figure out the hard argmax
        hard_flatten_idx = tf.argmax(
            tf.squeeze(k_map, axis=3), 1,
            output_type=tf.int64)  # [batch_size, keypoint_num]
        yx_map_flatten_single = tf.reshape(yx_map_raw,
                                           [km_h * km_w, 2])  # [H*W, 2]
        hard_yx_coordinates = tf.gather(
            yx_map_flatten_single,
            hard_flatten_idx)  # [batch_size, keypoint_num, 2]
        # anchor for computing the mean
        anchor_map = tf.expand_dims(hard_yx_coordinates, axis=1)
        # anchor_map: [batch_size, 1, keypoint_num, 2]
        hw_map = tmf.expand_dims(tf.cast([nh, nw], k_map.dtype),
                                 axis=0,
                                 ndims=3)  # [batch_size, 1, keypoint_num, 2]
        offset_map = anchor_map - hw_map * 0.5  # [batch_size, 1, keypoint_num, 2]
        yx_map = yx_map_raw - offset_map  # [batch_size, H*W, keypoint_num, 2]
        yx_map = cyclic_clip(yx_map)
        # yx_map: [batch_size, H*W, keypoint_num, 2]
    else:
        yx_map = yx_map_raw

    # weighted mean
    yx_mean = tf.reduce_sum(k_map * yx_map,
                            axis=1) / k_summed  # [batch_size, keypoint_num, 2]

    # weighted covariance
    yx_offsets = yx_map - tf.expand_dims(yx_mean, axis=1)
    yx_elt_selfcov = tf.square(yx_offsets)
    yx_elt_crosscov = yx_offsets[:, :, :, 0:1] * yx_offsets[:, :, :, 1:2]
    yx_elt_sccov = tf.concat([yx_elt_selfcov, yx_elt_crosscov], axis=3)
    yx_sccov = tf.reduce_sum(
        yx_elt_sccov * k_map,
        axis=1) / k_summed  # [batch_size, keypoint_num, 3]
    yx_self_stddev = tf.sqrt(yx_sccov[:, :, 0:2])
    yx_corr = tf.expand_dims(
        yx_sccov[:, :, 2] /
        ((yx_self_stddev[:, :, 0] * yx_self_stddev[:, :, 1]) + epsilon),
        axis=-1)

    if diag_cov:
        yx_corr = tf.zeros_like(yx_corr)

    if use_hard_max_as_anchors:
        yx_mean = cyclic_clip(yx_mean + tf.squeeze(offset_map, axis=1))

    yx_mean_stddev_corr = tf.concat(
        [yx_mean, yx_self_stddev, yx_corr], axis=2
    )  # [batch_size, keypoint_num, 5]: y_mean, x_mean, y_stddev, x_stddev, yx_corr

    return yx_mean_stddev_corr
Esempio n. 7
0
    def augment_images(self, image_tensor):

        if not hasattr(self, "target_size"):
            if hasattr(self, "input_size") and self.input_size is not None:
                self.target_size = self.input_size * 2

        actual_h, actual_w, full_h, full_w = \
            self.image_size(tmf.get_shape(image_tensor)[0], tmf.get_shape(image_tensor)[1])

        # random data augmentation for transformation invariance

        aug_cache = dict()
        aug_cache["original_image"] = image_tensor

        if not self.use_random_transform():
            return image_tensor, aug_cache, None

        batch_size = tmf.get_shape(image_tensor)[0]

        # get the landmarks using current model
        mos_tmp = asu.ModuleOutputStrip()
        with tgu.EnableAuxLoss(False):
            main_heatmap = self.input_to_heatmap(image_tensor, mos_tmp)
            main_keypoint_param = self.heatmap2structure_basic(main_heatmap)
        main_keypoint_param = main_keypoint_param[:, :, :2]
        del mos_tmp
        aug_cache[
            "network_predefined"] = True  # in the parent function reuse=True for network definition

        with tf.variable_scope("transform_invariance"):

            h = tmf.get_shape(image_tensor)[1]
            w = tmf.get_shape(image_tensor)[2]
            im = image_tensor
            im_shape = tmf.get_shape(im)

            # ---- RANDOM LANDMARK TPS TRANSFORM ----
            lm_n_points = tmf.get_shape(main_keypoint_param)[1]
            lm_rand_pt_std = 0.05  #0.1
            lm_tps_cp = tf.random_normal(shape=[batch_size, lm_n_points, 2],
                                         stddev=lm_rand_pt_std)
            lm_tps_cp *= np.sqrt(
                np.reshape([full_w / full_h, full_h / full_w], [1, 1, 2]))
            # remark: y,x: y enlarge normalized coordinate according to aspect ratio, x shrink normalized coordinate
            lm_tps_fp = self.coordinate_to_stn(main_keypoint_param,
                                               aspect_ratio=full_w / full_h)
            lm_tps_fp = tf.stop_gradient(lm_tps_fp)

            im_t_1 = pt.wrap(im).spatial_transformer_tps(None,
                                                         None,
                                                         lm_tps_cp,
                                                         out_size=[h, w],
                                                         fp_more=lm_tps_fp)
            im_t_1 = tf.reshape(im_t_1, im_shape)

            aug_cache["lm_tps"] = dict()
            aug_cache["lm_tps"]["transform"] = lm_tps_cp
            aug_cache["lm_tps"]["control_points"] = lm_tps_fp
            aug_cache["lm_tps"]["num_points"] = lm_n_points

            # ---- RANDOM TPS TRANSFORM ----
            n_points = 7
            rand_pt_std = 0.1  # 0.2
            tps_transform = tf.random_normal(
                shape=[batch_size, n_points * n_points, 2], stddev=rand_pt_std)

            im_t_2 = pt.wrap(im).spatial_transformer_tps(
                n_points,
                n_points,
                tps_transform,
                out_size=[h, w],
            )
            im_t_2 = tf.reshape(im_t_2, im_shape)

            aug_cache["tps"] = dict()
            aug_cache["tps"]["transform"] = tps_transform
            aug_cache["tps"]["num_points"] = n_points

            # -------------- SELECT RANDOM TPS --------------------
            global_step = tf.train.get_global_step()
            lm_tps_step_lower = 5000
            lm_tps_step_upper = 10000
            lm_tps_random_upper_th = self.lm_tps_probability
            lm_tps_random_th = tf.where(
                global_step <= lm_tps_step_lower,
                tf.constant(0, dtype=tf.float32),
                tf.where(
                    global_step > lm_tps_step_upper,
                    tf.constant(1, dtype=tf.float32),
                    tf.to_float(global_step - lm_tps_step_lower) /
                    (lm_tps_step_upper - lm_tps_step_lower)) *
                lm_tps_random_upper_th)
            use_lm_tps = tf.random_uniform([batch_size]) < lm_tps_random_th
            use_lm_tps = tf.zeros_like(use_lm_tps)
            im_t = tf.where(
                tf.tile(tmf.expand_dims(use_lm_tps, axis=-1, ndims=3),
                        [1] + im_shape[1:]), im_t_1, im_t_2)
            aug_cache["use_lm_tps"] = use_lm_tps

            # ---- RANDOM SIMILARITY TRANSFORM ----
            # generate random transformation and generate the image
            trans_range = np.array([-0.15, 0.15])  # translation
            rotation_std = 10  # degree
            scale_std = 1.25  # scale

            # canonicalize parameter range
            rotation_std = rotation_std / 180 * np.pi
            scale_std = np.log(scale_std)
            trans_range = trans_range * 2.  # spatial transformer use [-1, 1] for the coordinates

            # generate random transformation
            rand_base_t = tf.random_uniform(shape=[batch_size, 2, 1])
            rand_trans = rand_base_t * (
                trans_range[1] - trans_range[0]) + trans_range[0]  # trans x, y
            rand_rotation = tf.random_normal(
                shape=[batch_size, 1, 1]) * rotation_std
            rand_scale = tf.exp(
                tf.random_normal(shape=[batch_size, 1, 1]) * scale_std)

            if "keypoint_random_horizontal_mirroring" in self.options and \
                    self.options["keypoint_random_horizontal_mirroring"]:
                horizontal_sign = tf.to_float(
                    tf.random_uniform([batch_size, 1, 1]) > 0.5)
            else:
                horizontal_sign = 1.
            if "keypoint_random_vertical_mirroring" in self.options and \
                    self.options["keypoint_random_vertical_mirroring"]:
                vertical_sign = tf.to_float(
                    tf.random_uniform([batch_size, 1], 1) > 0.5)
            else:
                vertical_sign = 1.

            # concatenate parameters
            rand_cos = tf.cos(rand_rotation)
            rand_sin = tf.sin(rand_rotation)
            rand_rot_matrix = tf.concat([
                tf.concat([rand_cos, rand_sin], axis=1) * horizontal_sign,
                tf.concat([-rand_sin, rand_cos], axis=1) * vertical_sign,
            ],
                                        axis=2)
            rand_sim_matrix = tf.concat(
                [rand_scale * rand_rot_matrix, rand_trans], axis=2)
            transform = rand_sim_matrix

            im_t = pt.wrap(im_t).spatial_transformer(tf.reshape(
                transform, [batch_size, 6]),
                                                     out_size=im_shape[1:3])
            im_t = tf.reshape(im_t, im_shape)

            aug_cache["sim_transform"] = transform

            # fuse converted images
            im_a = tf.concat([im, im_t], axis=0)

        return im_a, aug_cache, None
Esempio n. 8
0
    def cleanup_augmentation_structure(self,
                                       structure_param,
                                       aug_cache,
                                       condition_tensor=None):

        actual_h, actual_w, full_h, full_w = self.image_size(
            tmf.get_shape(aug_cache["original_image"])[0],
            tmf.get_shape(aug_cache["original_image"])[1])
        full_a = full_w / full_h
        af_scaling = math.sqrt((actual_h / full_h) * (actual_w / full_w))

        if not self.use_random_transform():

            keypoint_param = structure_param
            batch_size = tmf.get_shape(structure_param)[0]

        else:

            with tf.variable_scope("transform_invariance"):

                lm_tps_cp = aug_cache["lm_tps"]["transform"]
                lm_tps_fp = aug_cache["lm_tps"]["control_points"]
                tps_transform = aug_cache["tps"]["transform"]
                tps_n_points = aug_cache["tps"]["num_points"]
                use_lm_tps = aug_cache["use_lm_tps"]
                transform = aug_cache["sim_transform"]

                batch_size = tmf.get_shape(structure_param)[0] // 2
                # keypoint_num = tmf.get_shape(structure_param)[1]

                # transform keypoints and match keypoints
                keypoint_param2 = structure_param[batch_size:, :, :2]
                keypoint_param = structure_param[:batch_size, :, :2]

                # keypoint matching
                kp1 = self.coordinate_to_stn(keypoint_param,
                                             aspect_ratio=full_a)
                kp2 = self.coordinate_to_stn(keypoint_param2,
                                             aspect_ratio=full_a)
                kp1h_from2 = (
                    pt.wrap(kp2).coordinate_inv_transformer(transform))
                kp1from2 = tf.where(
                    tf.tile(tmf.expand_dims(use_lm_tps, axis=-1, ndims=2),
                            [1] + tmf.get_shape(kp2)[1:]),
                    kp1h_from2.coordinate_inv_transformer_tps(
                        None, None, lm_tps_cp, fp_more=lm_tps_fp),
                    kp1h_from2.coordinate_inv_transformer_tps(
                        tps_n_points, tps_n_points, tps_transform))
                kp_diff_loss = tf.reduce_sum(
                    tf.reduce_sum(tf.square(kp1from2 - kp1), axis=[0, 1]) *
                    np.array([full_a, 1 / full_a])) / (af_scaling * batch_size)
                # remark: x,y: [-1,1]x[-1,1] --> [-aspect,+aspect]x[-1/aspect,+1/aspect], note the square
                transform_invariant_loss = self.options[
                    "keypoint_transform_loss_weight"] * kp_diff_loss
                tgu.add_to_aux_loss(transform_invariant_loss, "enc_transform")

        # optical flow
        of_condition = None
        if condition_tensor is not None:
            assert condition_tensor is not None, "need optical flow condition"
            for v in condition_tensor:
                if v["type"] == "optical_flow":
                    of_condition = v

        optical_flow_transform_loss_weight = None
        if "optical_flow_transform_loss_weight" in self.options:
            optical_flow_transform_loss_weight = self.options[
                "optical_flow_transform_loss_weight"]

        if optical_flow_transform_loss_weight is None:
            if of_condition is not None and "keypoint_transform_loss_weight" in self.options:
                optical_flow_transform_loss_weight = self.options[
                    "keypoint_transform_loss_weight"]

        optical_flow_strength_loss_weight = None
        if "optical_flow_strength_loss_weight" in self.options:
            optical_flow_strength_loss_weight = self.options[
                "optical_flow_strength_loss_weight"]

        if ptu.default_phase() == pt.Phase.train and \
                (rbool(optical_flow_transform_loss_weight) or rbool(optical_flow_strength_loss_weight)):

            assert of_condition is not None, "need optical flow condition"

            # coordinate before padding
            pre_keypoint_param = keypoint_param[:, :, :2]
            scaling_factor = np.array(self.target_input_size) / np.array(
                self.input_size)
            pre_keypoint_param = keypoints_2d.scale_keypoint_param(
                pre_keypoint_param, scaling_factor, src_aspect_ratio=full_a)

            # only use valid
            ind_offset = tf.reshape(of_condition["offset"], [-1])
            flow_map = of_condition["flow"]  # [batch_size, h, w, 2]
            valid_mask = tf.not_equal(ind_offset, 0)

            # interpolation mask
            flow_h, flow_w = tmf.get_shape(flow_map)[1:3]

            if rbool(optical_flow_transform_loss_weight):

                pre_interp_weights = keypoints_2d.gaussian_coordinate_to_keypoint_map(
                    tf.concat([
                        pre_keypoint_param,
                        tf.ones_like(pre_keypoint_param[:, :, -1:]) /
                        math.sqrt(flow_h * flow_w)
                    ],
                              axis=2),
                    km_h=flow_h,
                    km_w=flow_w)  # [batch_size, h, w, keypoint_num]
                pre_interp_weights /= tf.reduce_sum(
                    pre_interp_weights, axis=[1, 2
                                              ], keep_dims=True) + tmf.epsilon

                # pointwise flow
                next_ind = np.arange(batch_size) + ind_offset
                next_keypoint_param = tf.gather(pre_keypoint_param, next_ind)
                pointwise_flow = tf.reduce_sum(
                    tf.expand_dims(flow_map, axis=3) *
                    tf.expand_dims(pre_interp_weights, axis=4),
                    axis=[1, 2])

                # flow transform constraint
                next_keypoint_param_2 = pre_keypoint_param + pointwise_flow
                kp_of_trans_loss = tf.reduce_mean(
                    tf.boolean_mask(tmf.sum_per_sample(
                        tf.square(next_keypoint_param_2 -
                                  next_keypoint_param)),
                                    mask=valid_mask))
                optical_flow_transform_loss = kp_of_trans_loss * optical_flow_transform_loss_weight
                tgu.add_to_aux_loss(optical_flow_transform_loss, "flow_trans")

            if rbool(optical_flow_strength_loss_weight):

                pre_interp_weights = keypoints_2d.gaussian_coordinate_to_keypoint_map(
                    tf.concat(
                        [
                            pre_keypoint_param,
                            tf.ones_like(pre_keypoint_param[:, :, -1:]) *
                            (1 / 16)  #self.base_gaussian_stddev
                        ],
                        axis=2),
                    km_h=flow_h,
                    km_w=flow_w)  # [batch_size, h, w, keypoint_num]
                pre_interp_weights /= tf.reduce_sum(
                    pre_interp_weights, axis=[1, 2
                                              ], keep_dims=True) + tmf.epsilon

                kp_of_strength_loss = tf.reduce_mean(
                    tmf.sum_per_sample(
                        tf.boolean_mask(pre_interp_weights, mask=valid_mask) *
                        tf.sqrt(
                            tf.reduce_sum(tf.square(
                                tf.boolean_mask(flow_map, mask=valid_mask)),
                                          axis=3,
                                          keep_dims=True))))
                # kp_of_strength_loss = 1/(kp_of_strength_loss+1)
                kp_of_strength_loss = -kp_of_strength_loss
                optical_flow_strength_loss = kp_of_strength_loss * optical_flow_strength_loss_weight
                tgu.add_to_aux_loss(optical_flow_strength_loss,
                                    "flow_strength")

        # scale the parameters based on the padding ------
        if self.target_input_size is not None:
            assert self.input_size is not None, "self.input_size must be specified if self.target_input_size"
            scaling_factor = np.array(self.target_input_size) / np.array(
                self.input_size)
            keypoint_param = keypoints_2d.scale_keypoint_param(
                keypoint_param, scaling_factor, src_aspect_ratio=full_a)

        return keypoint_param