def get_image_pyramid(x, scales, num_channels): out_dict = dict() for scale in scales: out_dict['prediction_' + str(scale).replace('-', '.')] = anti_alias_interpolate( x, num_channels, scale) return out_dict
def detect_keypoint(x, block_expansion, num_kp, num_channels, max_features, num_blocks, temperature, estimate_jacobian=False, scale_factor=1, single_jacobian_map=False, pad=0, test=False, comm=None): if scale_factor != 1: x = anti_alias_interpolate(x, num_channels, scale_factor) with nn.parameter_scope("hourglass"): feature_map = hourglass(x, block_expansion, num_blocks=num_blocks, max_features=max_features, test=test, comm=comm) with nn.parameter_scope("keypoint_detector"): inmaps, outmaps = feature_map.shape[1], num_kp k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(7, 7)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) prediction = PF.convolution(feature_map, outmaps=num_kp, kernel=(7, 7), pad=(pad, pad), w_init=w_init, b_init=b_init) final_shape = prediction.shape heatmap = F.reshape(prediction, (final_shape[0], final_shape[1], -1)) heatmap = F.softmax(heatmap / temperature, axis=2) heatmap = F.reshape(heatmap, final_shape, inplace=False) out = gaussian2kp(heatmap) # {"value": value}, keypoint positions. if estimate_jacobian: if single_jacobian_map: num_jacobian_maps = 1 else: num_jacobian_maps = num_kp with nn.parameter_scope("jacobian_estimator"): jacobian_map = PF.convolution(feature_map, outmaps=4*num_jacobian_maps, kernel=(7, 7), pad=(pad, pad), w_init=I.ConstantInitializer(0), b_init=np.array([1, 0, 0, 1]*num_jacobian_maps)) jacobian_map = F.reshape( jacobian_map, (final_shape[0], num_jacobian_maps, 4, final_shape[2], final_shape[3])) heatmap = F.reshape( heatmap, heatmap.shape[:2] + (1,) + heatmap.shape[2:], inplace=False) jacobian = heatmap * jacobian_map jacobian = F.sum(jacobian, axis=(3, 4)) jacobian = F.reshape( jacobian, (jacobian.shape[0], jacobian.shape[1], 2, 2), inplace=False) out['jacobian'] = jacobian # jacobian near each keypoint. # out is a dictionary containing {"value": value, "jacobian": jacobian} return out
def predict_dense_motion(source_image, kp_driving, kp_source, block_expansion, num_blocks, max_features, num_kp, num_channels, estimate_occlusion_map=False, scale_factor=1, kp_variance=0.01, test=False, comm=None): if scale_factor != 1: source_image = anti_alias_interpolate(source_image, num_channels, scale_factor) bs, _, h, w = source_image.shape out_dict = dict() heatmap_representation = create_heatmap_representations( source_image, kp_driving, kp_source, kp_variance) sparse_motion = create_sparse_motions(source_image, kp_driving, kp_source, num_kp) deformed_source = create_deformed_source_image(source_image, sparse_motion, num_kp) out_dict['sparse_deformed'] = deformed_source input = F.concatenate(heatmap_representation, deformed_source, axis=2) input = F.reshape(input, (bs, -1, h, w)) with nn.parameter_scope("hourglass"): prediction = hourglass(input, block_expansion=block_expansion, num_blocks=num_blocks, max_features=max_features, test=test, comm=comm) with nn.parameter_scope("mask"): inmaps, outmaps = prediction.shape[1], num_kp + 1 k_w = I.calc_normal_std_he_forward(inmaps, outmaps, kernel=(7, 7)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) mask = PF.convolution(prediction, outmaps=num_kp + 1, kernel=(7, 7), pad=(3, 3), w_init=w_init, b_init=b_init) mask = F.softmax(mask, axis=1) out_dict['mask'] = mask reshaped_mask = F.reshape(mask, mask.shape[:2] + (1, ) + mask.shape[2:], inplace=False) sparse_motion = F.transpose(sparse_motion, (0, 1, 4, 2, 3)) deformation = F.sum(sparse_motion * reshaped_mask, axis=1) deformation = F.transpose(deformation, (0, 2, 3, 1)) out_dict['deformation'] = deformation if estimate_occlusion_map: with nn.parameter_scope("occlusion_map"): occlusion_map = F.sigmoid( PF.convolution(prediction, outmaps=1, kernel=(7, 7), pad=(3, 3), w_init=w_init, b_init=b_init)) out_dict['occlusion_map'] = occlusion_map else: occlusion_map = None return out_dict