def heatmap2structure_internal(self, heatmap_tensor): keypoint_map = heatmap_tensor[:, :, :, :-1] # remove bg # convert keypoint map to coordinate keypoint_param = keypoints_2d.keypoint_map_to_gaussian_coordinate( keypoint_map, use_hard_max_as_anchors=self.options["use_hard_max_as_anchors"] if "use_hard_max_as_anchors" in self.options else None) # Remark: keypoint_param has been scaled according to aspect ratio # keypoint_map_shape = tmf.get_shape(keypoint_map) # batch_size = keypoint_map_shape[0] batch_size = tmf.get_shape(keypoint_map)[0] keypoint_prob = tf.ones( [batch_size, tmf.get_shape(keypoint_map)[3]], dtype=keypoint_map.dtype) keypoint_param = tf.concat([ keypoint_param[:, :, :2], tf.reduce_mean(keypoint_param[:, :, 2:4], axis=2, keep_dims=True) ], axis=2) # use isotropic gaussian return keypoint_param, keypoint_prob
def _detailedgrid(x_t_flat, y_t_flat, fp): x_t_flat_b = tf.expand_dims( x_t_flat, axis=1) # [1 or n, 1, h*w] h*w==num_points y_t_flat_b = tf.expand_dims(y_t_flat, axis=1) # [1 or n, 1, h*w] p_batch_size = tmf.get_shape(x_t_flat)[0] num_batch = tmf.get_shape(fp)[0] if p_batch_size == 1: x_t_flat_g = tf.tile(x_t_flat_b, tf.stack([num_batch, 1, 1])) # [n, 1, h*w] y_t_flat_g = tf.tile(y_t_flat_b, tf.stack([num_batch, 1, 1])) # [n, 1, h*w] else: x_t_flat_g = x_t_flat_b y_t_flat_g = y_t_flat_b assert num_batch == p_batch_size, "batch sizes do not match" px = tf.expand_dims(fp[:, :, 0], 2) # [n, nx*ny, 1] py = tf.expand_dims(fp[:, :, 1], 2) # [n, nx*ny, 1] d = tf.sqrt(tf.pow(x_t_flat_b - px, 2.) + tf.pow(y_t_flat_b - py, 2.)) r = tf.pow(d, 2) * tf.log(d + 1e-6) # [n, nx*ny, h*w] ones = tf.ones_like(x_t_flat_g) # [n, 1, h*w] grid = tf.concat([ones, x_t_flat_g, y_t_flat_g, r], 1) # [n, nx*ny+3, h*w] return grid
def gaussian2d_axis_balancing(p, no_mean_input=False): assert len(tmf.get_shape(p)) == 3, "wrong rank" if not no_mean_input: p = p[:, :, 2:] p_shape = tmf.get_shape(p) if p_shape[-1] == 0: # b = tf.ones(p_shape[:-1]+[1], dtype=p.dtype) * math.log(epsilon) b = tf.constant(0, dtype=p.dtype) else: p2 = tf.square(p) if p_shape[-1] == 1: x = p2[:, :, 0] y = x q = 0. elif p_shape[-1] == 2: x = p2[:, :, 0] y = p2[:, :, 1] q = 0. elif p_shape[-1] == 3: x = p2[:, :, 0] y = p2[:, :, 1] q = p2[:, :, 2] else: raise ValueError("too many parameters") x_plus_y = x + y # b = tf.log(tf.square(x_plus_y) + 4.*(q-1.)*x*y + epsilon) - 2.*tf.log(x_plus_y + epsilon) b = 1. + (4. * (q - 1.) * x * y) / tf.square(x_plus_y) return b
def recon_and_nll_subnet(self, samples, data_tensor, enc_extra_outputs=None, **kwargs): sample_num = tmf.get_shape(samples)[0] // tmf.get_shape(data_tensor)[0] dec_extra_kwargs = dict() dec_extra_kwargs["extra_inputs"] = dict() if enc_extra_outputs is not None and "for_decoder" in enc_extra_outputs: dec_extra_inputs = enc_extra_outputs["for_decoder"] dec_extra_kwargs["extra_inputs"] = dec_extra_inputs dec_extra_kwargs["extra_inputs"]["data"] = data_tensor reconstructed, dec_extra_outputs = self.decoding_subnet( samples, **dec_extra_kwargs, **kwargs) if "recon_batch_size" in dec_extra_outputs and dec_extra_outputs[ "recon_batch_size"] is not None: recon_batch_size = dec_extra_outputs["recon_batch_size"] else: recon_batch_size = tmf.get_shape(reconstructed)[0] elt_recon_nll = self.recon_subnet( reconstructed[:recon_batch_size], tmf.rep_sample(data_tensor, sample_num)) recon_nll = tmf.sum_per_sample(elt_recon_nll) total_recon_nll = recon_nll return reconstructed, total_recon_nll, recon_nll, dec_extra_outputs
def reshape_extended_features(a, param_factor): if a is None: return None, None kept_shape = tmf.get_shape(a)[:-1] + [tmf.get_shape(a)[-1] // param_factor] a = tf.reshape(a, kept_shape + [param_factor]) main_chooser = (slice(None, None), ) * len(kept_shape) + (0, ) b = a[main_chooser] return a, b
def recon_subnet(self, reconstructed, data_tensor): rep_num = tmf.get_shape(reconstructed)[0] // tmf.get_shape( data_tensor)[0] data_tensor = tmf.rep_sample(data_tensor, rep_num) with tf.variable_scope("recon"): elt_recon_loss = \ self._recon_factory()(reconstructed, data_tensor) return elt_recon_loss
def keypoint_map_depth_normalization_with_fake_bg(keypoint_map): bg_prob = 1. / (tmf.get_shape(keypoint_map)[1] * tmf.get_shape(keypoint_map)[2]) keypoint_map_z = tf.reduce_sum(keypoint_map, axis=3, keep_dims=True) + bg_prob keypoint_map /= keypoint_map_z normalized_bg_prob = bg_prob / tf.squeeze(keypoint_map_z, axis=3) return keypoint_map, normalized_bg_prob
def gaussian_coordinate_to_keypoint_map(yx_mean_stddev_corr, km_h, km_w, dtype=None): input_shape = tmf.get_shape(yx_mean_stddev_corr) assert len(input_shape) == 3, "wrong rank" input_tensor_list = list() input_tensor_list.append(yx_mean_stddev_corr) if input_shape[2] < 3: input_tensor_list.append( tf.ones(input_shape[:2] + [2], dtype=yx_mean_stddev_corr.dtype) * gaussian_2d_base_stddev) elif input_shape[2] < 4: input_tensor_list.append(yx_mean_stddev_corr[:, :, 2:3]) if input_shape[2] < 5: input_tensor_list.append( tf.zeros(input_shape[:2] + [1], dtype=yx_mean_stddev_corr.dtype)) yx_mean_stddev_corr = tf.concat(input_tensor_list, axis=2) input_shape = tmf.get_shape(yx_mean_stddev_corr) assert input_shape[2] == 5, "wrong parameter number" if dtype is None: dtype = yx_mean_stddev_corr.dtype # batch_size = input_shape[0] # keypoint_num = input_shape[1] yx_map = yx_grid_map(km_h, km_w, dtype, aspect_ratio=km_w / km_h) # [1, H, W, 1, 2] p_map = tmf.expand_dims(yx_mean_stddev_corr, axis=1, ndims=2) # [batch_size, 1, 1, keypoint_num, 5] det_map = tmf.expand_dims(gaussian2d_det(yx_mean_stddev_corr), axis=1, ndims=2) yx_zm_map = yx_map - p_map[:, :, :, :, 0:2] # y, x : zero mean yx_zm_map_2 = tf.square(yx_zm_map) m_map = p_map[:, :, :, :, 2:] # sigma_y, sigma_x, corr_yx m_map_2 = tf.square(m_map) u_numerator = ( yx_zm_map_2[:, :, :, :, 0] * m_map_2[:, :, :, :, 1] + yx_zm_map_2[:, :, :, :, 1] * m_map_2[:, :, :, :, 0] - 2. * tf.reduce_prod(yx_zm_map, axis=4) * tf.reduce_prod(m_map, axis=4)) u_denominator = (tf.square(m_map_2[:, :, :, :, 2]) - 1.) * m_map_2[:, :, :, :, 0] * m_map_2[:, :, :, :, 1] - epsilon keypoint_map = tmf.safe_exp(0.5 * (u_numerator / u_denominator)) / ( (2. * math.pi * det_map + epsilon) * (km_h * km_w)) keypoint_map /= km_h * km_w # normalize to probability mass return keypoint_map
def _transform_xy(T, fp, x, y): assert len(tmf.get_shape(x)) == 2 and tmf.get_shape(x) == tmf.get_shape(y), \ "x and y must be rank 2 and of the same size" grid = TPS._detailedgrid(x, y, fp) x_s_flat, y_s_flat = TPS._transform_internal(T, grid) x_s = tf.reshape(x_s_flat, tmf.get_shape(x)) y_s = tf.reshape(y_s_flat, tmf.get_shape(y)) return x_s, y_s
def visible_dist(self, input_tensor): s = tmf.get_shape(input_tensor) latent_dim = np.prod(s[1:]) // self.param_num() param_tensor = self.output_dist.transform2param( input_tensor, latent_dim) dist_param = self.output_dist.parametrize(param_tensor, latent_dim) return dist_param, param_tensor
def rep_to_batch_size(x): x_batch_size = tmf.get_shape(x)[0] if batch_size == x_batch_size: return x rep_factor = batch_size // x_batch_size assert rep_factor == batch_size / x_batch_size, "sample num factor is not integer" return tmf.rep_sample(x, rep_factor)
def real_to_gaussian2dparam(r): assert len(tmf.get_shape(r)) == 3, "wrong rank" param_num = tmf.get_shape(r)[-1] assert 2 <= param_num <= 5, "wrong param number" tensor_list = list() tensor_list.append(tf.nn.sigmoid(r[:, :, 0:2])) if param_num >= 4: tensor_list.append( tmf.atanh_sigmoid(r[:, :, 2:4]) * gaussian_2d_base_stddev) if param_num == 5: tensor_list.append(tf.nn.tanh(r[:, :, 4:5])) else: tensor_list.append( tmf.atanh_sigmoid(r[:, :, 2:3]) * gaussian_2d_base_stddev) return tf.concat(tensor_list, axis=2)
def gaussian2dparam_to_recon_code(p): assert len(tmf.get_shape(p)) == 3, "wrong rank" param_num = tmf.get_shape(p)[-1] assert 2 <= param_num <= 5, "wrong param number" tensor_list = list() tensor_list.append(p[:, :, :2]) if param_num >= 4: tensor_list.append( tf.log(p[:, :, 2:4] / gaussian_2d_base_stddev + epsilon)) if param_num == 5: tensor_list.append(tmf.atanh(p[:, :, 4:5])) else: tensor_list.append( tf.log(p[:, :, 2:3] / gaussian_2d_base_stddev + epsilon)) return tf.concat(tensor_list, axis=2)
def gaussian2d_det(p, no_mean_input=False): assert len(tmf.get_shape(p)) == 3, "wrong rank" if not no_mean_input: p = p[:, :, 2:] p_shape = tmf.get_shape(p) if p_shape[-1] == 0: d = tf.ones(p_shape[:-1] + [1], dtype=p.dtype) * math.pow( gaussian_2d_base_stddev, 4.) elif p_shape[-1] == 1: d = tf.pow(p[:, :, 0], 4.) elif p_shape[-1] == 2: d = tf.square(p[:, :, 0] * p[:, :, 1]) elif p_shape[-1] == 3: d = (1. - tf.square(p[:, :, 2])) * tf.square(p[:, :, 0] * p[:, :, 1]) else: raise ValueError("too many parameters") return d
def gaussian2d_exp_entropy(p, no_mean_input=False, stddev_scaling=1.): assert len(tmf.get_shape(p)) == 3, "wrong rank" if not no_mean_input: p = p[:, :, 2:] p_shape = tmf.get_shape(p) z = math.exp(math.log(2 * math.pi) + 1.) if p_shape[-1] == 0: d = tf.ones(p_shape[:-1]+[1], dtype=p.dtype) * \ (z * math.exp(2. * math.log(gaussian_2d_base_stddev))) elif p_shape[-1] == 1: d = z * tf.square(p[:, :, 0]) elif p_shape[-1] == 2: d = z * p[:, :, 0] * p[:, :, 1] elif p_shape[-1] == 3: d = z * p[:, :, 0] * p[:, :, 1] * tf.sqrt(1. - tf.square(p[:, :, 2])) else: raise ValueError("too many parameters") return d / (stddev_scaling**2)
def latent2structure_patch_overall_generic(self, latent_tensor): keypoint_num = self.options["keypoint_num"] batch_size = tmf.get_shape(latent_tensor)[0] total_dim = tmf.get_shape(latent_tensor)[1] cur_dim = 0 keypoint_param_dim = self.structure_param_num * keypoint_num keypoint_tensor = latent_tensor[:, :keypoint_param_dim] keypoint_param = tf.reshape(keypoint_tensor, [batch_size, keypoint_num, -1]) cur_dim += keypoint_param_dim if self.patch_feature_dim is not None and self.patch_feature_dim > 0: all_patch_feat_dims = (keypoint_num + 1) * self.patch_feature_dim patch_tensor = latent_tensor[:, keypoint_param_dim: keypoint_param_dim + all_patch_feat_dims] patch_features = tf.reshape(patch_tensor, [ batch_size, keypoint_num + (1 if self.use_background_feature else 0), self.patch_feature_dim ]) cur_dim += all_patch_feat_dims else: patch_features = None if total_dim > cur_dim: overall_features = latent_tensor[:, cur_dim:] if self.overall_feature_dim is None or self.overall_feature_dim + cur_dim < total_dim: warnings.warn("mismatch overall feature dim specification") else: overall_features = None if not self.use_background_feature: # set background features to zeros patch_features = tf.concat([ patch_features[:, :-1, :], tf.zeros_like(patch_features[:, -1:, :]) ], axis=1) return keypoint_param, patch_features, overall_features, None
def _decoding_subnet(self, samples, condition_tensor=None, options=None, extra_inputs=None): if condition_tensor is not None: batch_size = tmf.get_shape(samples)[0] def rep_to_batch_size(x): x_batch_size = tmf.get_shape(x)[0] if batch_size == x_batch_size: return x rep_factor = batch_size // x_batch_size assert rep_factor == batch_size / x_batch_size, "sample num factor is not integer" return tmf.rep_sample(x, rep_factor) condition_tensor = recursive_apply(tmf.is_tf_data, rep_to_batch_size, condition_tensor) output_param_num = self._recon_factory().param_num() factory_arg_prefix = ["decoder"] if condition_tensor is not None: if hasattr(self.opt, "condition_at_latent" ) and self.opt.condition_at_latent is not None: factory_arg_prefix = [ "cond_decoder", "generic_" + self.opt.condition_at_latent + "_at_begin" ] dec_factory = call_func_with_ignored_args(net_factory, *factory_arg_prefix, self.opt.decoder_name, output_param_num, options=options) reconstructed = call_func_with_ignored_args( dec_factory, samples, condition_tensor=condition_tensor, extra_inputs=extra_inputs) default_extra_outputs = dict() default_extra_outputs["save"] = dict() default_extra_outputs["extra_recon"] = dict() default_extra_outputs["cond"] = dict() if isinstance(reconstructed, (tuple, list)): extra_outputs = reconstructed[1] reconstructed = reconstructed[0] else: extra_outputs = dict() extra_outputs = {**default_extra_outputs, **extra_outputs} return reconstructed, extra_outputs
def transform2param(cls, input_tensor, latent_dim): """ Create network for converting input_tensor to distribution parameters :param input_tensor: (posterior phase) input tensor for the posterior :param latent_dim: dimension of the latent_variables :return: param_tensor - distribution parameters """ assert tmf.get_shape(input_tensor)[1] == latent_dim, "wrong dim" param_tensor = input_tensor return param_tensor
def gaussian2d_entropy(p, no_mean_input=False): assert len(tmf.get_shape(p)) == 3, "wrong rank" if not no_mean_input: p = p[:, :, 2:] p_shape = tmf.get_shape(p) z = (math.log(2 * math.pi) + 1.) if p_shape[-1] == 0: d = tf.ones(p_shape[:-1]+[1], dtype=p.dtype) * \ (z + 2. * math.log(gaussian_2d_base_stddev)) elif p_shape[-1] == 1: d = z + 2. * tf.log(p[:, :, 0] + epsilon) elif p_shape[-1] == 2: d = z + (tf.log(p[:, :, 0] + epsilon) + tf.log(p[:, :, 1] + epsilon)) elif p_shape[-1] == 3: d = z + (0.5 * tf.log(1. - tf.square(p[:, :, 2]) + epsilon) + tf.log(p[:, :, 0] + epsilon) + tf.log(p[:, :, 1] + epsilon)) else: raise ValueError("too many parameters") return d
def parametrize(cls, param_tensor, latent_dim): """ Create network for converting parameter_tensor to parameter dictionary :param param_tensor: (posterior phase) input tensor for the posterior :param latent_dim: dimension of the latent_variables :return: dist_param - distribution parameters """ assert tmf.get_shape(param_tensor)[1] == latent_dim, "wrong dim" dist_param = cls.param_dict(mean=param_tensor, ) return dist_param
def __call__(self, input_tensor, condition_tensor=None, extra_inputs=None): """Create encoder network. """ latent_tensor, mos = self.patch_structure_overall_encode( input_tensor, condition_tensor=condition_tensor, extra_inputs=extra_inputs ) assert self.output_channels == tmf.get_shape(latent_tensor)[1], \ "wrong output_channels" return latent_tensor, mos.extra_outputs
def __call__(self, input_tensor, gt_tensor): input_tensor, input_shape = self.flatten_dist_tensor(input_tensor) gt_s = tmf.get_shape(gt_tensor) gt_tensor = tf.reshape(gt_tensor, [gt_s[0], np.prod(gt_s[1:])]) dist_param, _ = self.visible_dist(input_tensor) nll, _ = self.output_dist.nll(dist_param, gt_tensor) nll = tf.reshape(nll, input_shape) return nll
def coordinate_inv_transformer_tps(input_layer, nx, ny, cp, fp_more=None, name=PROVIDED): input_shape = tmf.get_shape(input_layer.tensor) assert len(input_shape) == 3, "input tensor must be rank 3" p = input_layer.tensor with tf.variable_scope(name): output = TPS_TRANSFORM(nx, ny, cp, p, fp_more=fp_more) return output
def bg_feature2image(self, bg_feature): batch_size = tmf.get_shape(bg_feature)[0] with pt.defaults_scope(**self.pt_defaults_scope_value()): return (pt.wrap(bg_feature).conv2d(3, 512). # 2 deconv2d(3, 512, stride=2). # 4 deconv2d(3, 256, stride=2). # 8 deconv2d(3, 256, stride=2). # 16 deconv2d(3, 128, stride=2). # 32 deconv2d(3, 64, stride=2). # 64 deconv2d(3, 32, stride=2). # 128 conv2d(3, 3 * self.recon_dist_param_num, activation_fn=None).tensor)
def flatten_dist_tensor(self, dist_tensor): s = tmf.get_shape(dist_tensor) total_hidden = np.prod(s[1:]) input_tensor = tf.reshape( dist_tensor, [s[0], total_hidden // self.param_num(), self.param_num()]) input_tensor = tf.transpose( input_tensor, [0, 2, 1]) # move the channel in front of geometric axes input_tensor = tf.reshape(input_tensor, [s[0], total_hidden]) s[-1] //= self.param_num() return input_tensor, s
def coordinate_inv_transformer(input_layer, theta, name=PROVIDED): # init input_tensor = input_layer.tensor input_shape = tmf.get_shape(input_tensor) assert len(input_shape) == 3, "input tensor must be rank 3" if theta is np.ndarray: theta = tf.constant(theta) elif not tmf.is_tf_data(theta): theta = theta.tensor keypoint_num = tmf.get_shape(input_tensor)[1] with tf.variable_scope(name): kp2_e = tf.concat( [input_tensor, tf.ones_like(input_tensor[:, :, :1])], axis=2) kp2_e = tf.expand_dims(kp2_e, axis=-1) transform_e = tf.tile(tf.expand_dims(theta, axis=1), [1, keypoint_num, 1, 1]) kp1from2_e = tf.matmul(transform_e, kp2_e) kp1from2 = tf.squeeze(kp1from2_e, axis=-1) return kp1from2
def spatial_transformer_tps(input_layer, nx, ny, cp, out_size, fp_more=None, name=PROVIDED): # init input_shape = tmf.get_shape(input_layer.tensor) assert len(input_shape) == 4, "input tensor must be rank 4" def convert_to_tensor(a): if a is np.ndarray: a = tf.constant(a) elif not tmf.is_tf_data(a): a = a.tensor return a cp = convert_to_tensor(cp) batch_size = tmf.get_shape(input_layer)[0] # apply transformer with tf.variable_scope(name): cp = tf.reshape(cp, [batch_size, -1, 2]) output = TPS_STN(input_layer.tensor, nx, ny, cp, out_size=out_size, fp_more=fp_more) # make output shape explicit output = tf.reshape(output, [input_shape[0]] + out_size + [input_shape[3]]) return output
def heatmap_postprocess(self, heatmap): extra_outputs = dict() extra_outputs["heatmap_extra"] = dict() heatmap_ch = tmf.get_shape(heatmap)[3] expected_channels = self.options["keypoint_num"] + 1 if heatmap_ch != self.options["keypoint_num"] + 1: extra_outputs["heatmap_extra"]["feature"] = heatmap if hasattr(self, "pt_defaults_scope_value"): pt_scope = pt.defaults_scope(**self.pt_defaults_scope_value()) else: pt_scope = dummy_class_for_with() with pt_scope: heatmap = pt.wrap(heatmap).conv2d(1, expected_channels, activation_fn=None) return heatmap, extra_outputs
def real_to_recon_code(r): p = real_to_gaussian2dparam(r) param_num = tmf.get_shape(p)[-1] tensor_list = list() tensor_list.append(p[:, :, :2]) if param_num >= 4: tensor_list.append( tf.log(p[:, :, 2:4] / gaussian_2d_base_stddev + epsilon)) if param_num == 5: tensor_list.append(r[:, :, 4:5]) else: tensor_list.append( tf.log(p[:, :, 2:3] / gaussian_2d_base_stddev + epsilon)) return tf.concat(tensor_list, axis=2)
def spatial_transformer(input_layer, theta, out_size, name=PROVIDED): # init input_shape = tmf.get_shape(input_layer.tensor) assert len(input_shape) == 4, "input tensor must be rank 4" if theta is np.ndarray: theta = tf.constant(theta) elif not tmf.is_tf_data(theta): theta = theta.tensor # apply transformer output = transformer(input_layer.tensor, theta, out_size=out_size, name=name) # make output shape explicit output = tf.reshape(output, [input_shape[0]] + out_size + [input_shape[3]]) return output