def downblock(x, out_features, norm=False, kernel_size=4, pool=False, sn=False, test=False): out = x if sn: def apply_w(w): return PF.spectral_norm(w, dim=0, test=test) else: apply_w = None inmaps, outmaps = out.shape[1], out_features k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(kernel_size, kernel_size)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) out = PF.convolution(out, out_features, kernel=(kernel_size, kernel_size), pad=(0, 0), stride=(1, 1), w_init=w_init, b_init=b_init, apply_w=apply_w) if norm: out = PF.instance_normalization(out) out = F.leaky_relu(out, 0.2, inplace=True) if pool: out = F.average_pooling(out, kernel=(2, 2)) return out
def detect_keypoint(x, block_expansion, num_kp, num_channels, max_features, num_blocks, temperature, estimate_jacobian=False, scale_factor=1, single_jacobian_map=False, pad=0, test=False, comm=None): if scale_factor != 1: x = anti_alias_interpolate(x, num_channels, scale_factor) with nn.parameter_scope("hourglass"): feature_map = hourglass(x, block_expansion, num_blocks=num_blocks, max_features=max_features, test=test, comm=comm) with nn.parameter_scope("keypoint_detector"): inmaps, outmaps = feature_map.shape[1], num_kp k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(7, 7)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) prediction = PF.convolution(feature_map, outmaps=num_kp, kernel=(7, 7), pad=(pad, pad), w_init=w_init, b_init=b_init) final_shape = prediction.shape heatmap = F.reshape(prediction, (final_shape[0], final_shape[1], -1)) heatmap = F.softmax(heatmap / temperature, axis=2) heatmap = F.reshape(heatmap, final_shape, inplace=False) out = gaussian2kp(heatmap) # {"value": value}, keypoint positions. if estimate_jacobian: if single_jacobian_map: num_jacobian_maps = 1 else: num_jacobian_maps = num_kp with nn.parameter_scope("jacobian_estimator"): jacobian_map = PF.convolution(feature_map, outmaps=4*num_jacobian_maps, kernel=(7, 7), pad=(pad, pad), w_init=I.ConstantInitializer(0), b_init=np.array([1, 0, 0, 1]*num_jacobian_maps)) jacobian_map = F.reshape( jacobian_map, (final_shape[0], num_jacobian_maps, 4, final_shape[2], final_shape[3])) heatmap = F.reshape( heatmap, heatmap.shape[:2] + (1,) + heatmap.shape[2:], inplace=False) jacobian = heatmap * jacobian_map jacobian = F.sum(jacobian, axis=(3, 4)) jacobian = F.reshape( jacobian, (jacobian.shape[0], jacobian.shape[1], 2, 2), inplace=False) out['jacobian'] = jacobian # jacobian near each keypoint. # out is a dictionary containing {"value": value, "jacobian": jacobian} return out
def _create_variable(v, name, shape): # Create and initialize variables class Variable: pass parameter = v.type == "Parameter" variable_instance = None if parameter: if v.initializer.type == 'Normal': initializer = NormalInitializer(v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Uniform': initializer = UniformInitializer( lim=[-v.initializer.multiplier, v.initializer.multiplier]) elif v.initializer.type == 'UniformAffineGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'UniformConvolutionGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Constant': initializer = ConstantInitializer(value=v.initializer.multiplier) else: initializer = None variable_instance = get_parameter_or_create(name, shape, initializer) else: # create empty variable, memory will be allocated in network.setup() # after network optimization variable_instance = nn.Variable() variable = Variable() variable.name = name variable.parameter = parameter variable.shape = shape variable.variable_instance = variable_instance return variable
def _create_variable(v, name, shape): # Create and initialize variables class Variable: pass parameter = v.type == "Parameter" variable_instance = None if parameter: if v.initializer.type == 'Normal': initializer = NormalInitializer(v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Uniform': initializer = UniformInitializer( lim=[-v.initializer.multiplier, v.initializer.multiplier]) elif v.initializer.type == 'UniformAffineGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'UniformConvolutionGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Constant': initializer = ConstantInitializer(value=v.initializer.multiplier) else: initializer = None variable_instance = get_parameter_or_create(name, shape, initializer) else: # create empty variable, memory will be allocated in network.setup() # after network optimization variable_instance = nn.Variable() variable = Variable() variable.name = name variable.parameter = parameter variable.shape = shape variable.variable_instance = variable_instance return variable
def resblock(x, in_features: int, kernel_size: int, padding: int, test: bool = False, comm=None): if comm: batchnorm = functools.partial(PF.sync_batch_normalization, comm=comm, group='world', axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) else: # 1 GPU batchnorm = functools.partial(PF.batch_normalization, axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) inmaps, outmaps = x.shape[1], in_features k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(kernel_size, kernel_size)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) with nn.parameter_scope("convblock_0"): out = batchnorm(x) out = F.relu(out, inplace=True) out = PF.convolution(out, outmaps=in_features, kernel=(kernel_size, kernel_size), pad=(padding, padding), w_init=w_init, b_init=b_init) with nn.parameter_scope("convblock_2"): out = batchnorm(out) out = F.relu(out, inplace=True) out = PF.convolution(out, outmaps=in_features, kernel=(kernel_size, kernel_size), pad=(padding, padding), w_init=w_init, b_init=b_init) out = F.add2(out, x, inplace=True) return out
def conv(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], outmaps, kernel=kernel) else: std = calc_normal_std_he_forward(inp.shape[base_axis], outmaps, kernel=kernel) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def discriminator(x, kp=None, num_channels=3, block_expansion=64, num_blocks=4, max_features=512, sn=False, use_kp=False, num_kp=10, kp_variance=0.01, test=False, **kwargs): down_blocks = [] for i in range(num_blocks): down_blocks.append( functools.partial(downblock, out_features=min( max_features, block_expansion * (2 ** (i + 1))), norm=(i != 0), kernel_size=4, pool=(i != num_blocks - 1), sn=sn, test=test)) feature_maps = [] out = x if use_kp: heatmap = kp2gaussian(kp, x.shape[2:], kp_variance) out = F.concatenate(out, heatmap, axis=1) for i, down_block in enumerate(down_blocks): with nn.parameter_scope(f"downblock_{i}"): feature_maps.append(down_block(out)) out = feature_maps[-1] if sn: def apply_w(w): return PF.spectral_norm(w, dim=0, test=test) else: apply_w = None with nn.parameter_scope("prediction"): inmaps, outmaps = out.shape[1], 1 k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(1, 1)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) prediction_map = PF.convolution(out, 1, kernel=(1, 1), pad=(0, 0), stride=(1, 1), w_init=w_init, b_init=b_init, apply_w=apply_w) return feature_maps, prediction_map
def downblock(x, out_features, kernel_size=3, padding=1, groups=1, test=False, comm=None): if comm: batchnorm = functools.partial(PF.sync_batch_normalization, comm=comm, group='world', axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) else: # 1 GPU batchnorm = functools.partial(PF.batch_normalization, axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) inmaps, outmaps = x.shape[1], out_features k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(kernel_size, kernel_size)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) with nn.parameter_scope("downblock"): out = PF.convolution(x, outmaps=out_features, kernel=(kernel_size, kernel_size), pad=(padding, padding), group=groups, w_init=w_init, b_init=b_init) out = batchnorm(out) out = F.relu(out, inplace=True) out = F.average_pooling(out, kernel=(2, 2)) return out
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap) else: std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], n_outmaps), rng=rng) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w, b, base_axis)
def pf_convolution(x, ochannels, kernel, stride=(1, 1), group=1, channel_last=False, with_bias=False): axes = [get_channel_axis(channel_last)] ichannels = x.shape[axes[0]] init = I.NormalInitializer(sigma=I.calc_normal_std_he_forward( ichannels, ochannels, kernel=kernel), rng=RNG) pad = tuple([int((k - 1) // 2) for k in kernel]) return PF.convolution(x, ochannels, kernel, stride=stride, pad=pad, group=group, with_bias=with_bias, channel_last=channel_last, w_init=init)
def conv(x, c, name, kernel=(3, 3), pad=(1, 1), stride=(1, 1), zeroing_w=False): # init weight and bias with uniform, which is the same as pytorch lim = I.calc_normal_std_he_forward(x.shape[1] * 2, c, tuple(kernel)) w_init = I.UniformInitializer(lim=(-lim, lim), rng=None) b_init = I.UniformInitializer(lim=(-lim, lim), rng=None) if zeroing_w: w_init = I.ConstantInitializer(0) b_init = I.ConstantInitializer(0) return PF.convolution(x, c, kernel, pad=pad, stride=stride, name=name, w_init=w_init, b_init=b_init)
def predict_dense_motion(source_image, kp_driving, kp_source, block_expansion, num_blocks, max_features, num_kp, num_channels, estimate_occlusion_map=False, scale_factor=1, kp_variance=0.01, test=False, comm=None): if scale_factor != 1: source_image = anti_alias_interpolate(source_image, num_channels, scale_factor) bs, _, h, w = source_image.shape out_dict = dict() heatmap_representation = create_heatmap_representations( source_image, kp_driving, kp_source, kp_variance) sparse_motion = create_sparse_motions(source_image, kp_driving, kp_source, num_kp) deformed_source = create_deformed_source_image(source_image, sparse_motion, num_kp) out_dict['sparse_deformed'] = deformed_source input = F.concatenate(heatmap_representation, deformed_source, axis=2) input = F.reshape(input, (bs, -1, h, w)) with nn.parameter_scope("hourglass"): prediction = hourglass(input, block_expansion=block_expansion, num_blocks=num_blocks, max_features=max_features, test=test, comm=comm) with nn.parameter_scope("mask"): inmaps, outmaps = prediction.shape[1], num_kp + 1 k_w = I.calc_normal_std_he_forward(inmaps, outmaps, kernel=(7, 7)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) mask = PF.convolution(prediction, outmaps=num_kp + 1, kernel=(7, 7), pad=(3, 3), w_init=w_init, b_init=b_init) mask = F.softmax(mask, axis=1) out_dict['mask'] = mask reshaped_mask = F.reshape(mask, mask.shape[:2] + (1, ) + mask.shape[2:], inplace=False) sparse_motion = F.transpose(sparse_motion, (0, 1, 4, 2, 3)) deformation = F.sum(sparse_motion * reshaped_mask, axis=1) deformation = F.transpose(deformation, (0, 2, 3, 1)) out_dict['deformation'] = deformation if estimate_occlusion_map: with nn.parameter_scope("occlusion_map"): occlusion_map = F.sigmoid( PF.convolution(prediction, outmaps=1, kernel=(7, 7), pad=(3, 3), w_init=w_init, b_init=b_init)) out_dict['occlusion_map'] = occlusion_map else: occlusion_map = None return out_dict
def occlusion_aware_generator(source_image, kp_driving, kp_source, num_channels, num_kp, block_expansion, max_features, num_down_blocks, num_bottleneck_blocks, estimate_occlusion_map=False, dense_motion_params=None, estimate_jacobian=False, test=False, comm=None): # pre-downsampling out = sameblock(source_image, out_features=block_expansion, kernel_size=7, padding=3, test=test, comm=comm) # downsampling for i in range(num_down_blocks): with nn.parameter_scope(f"downblock_{i}"): out_features = min(max_features, block_expansion * (2 ** (i + 1))) out = downblock(out, out_features=out_features, kernel_size=3, padding=1, test=test, comm=comm) output_dict = {} if dense_motion_params is not None: with nn.parameter_scope("dense_motion_prediction"): dense_motion = predict_dense_motion(source_image=source_image, kp_driving=kp_driving, kp_source=kp_source, num_kp=num_kp, num_channels=num_channels, estimate_occlusion_map=estimate_occlusion_map, test=test, comm=comm, **dense_motion_params) # dense_motion is a dictionay containing: # 'sparse_deformed': <Variable((8, 11, 3, 256, 256)), # 'mask': <Variable((8, 11, 256, 256)), # 'deformation': <Variable((8, 256, 256, 2)), # 'occlusion_map': <Variable((8, 1, 256, 256))} output_dict['mask'] = dense_motion['mask'] output_dict['sparse_deformed'] = dense_motion['sparse_deformed'] # Transform feature representation by deformation (+ occlusion) if 'occlusion_map' in dense_motion: occlusion_map = dense_motion['occlusion_map'] output_dict['occlusion_map'] = occlusion_map else: occlusion_map = None deformation = dense_motion['deformation'] out = deform_input(out, deformation) if occlusion_map is not None: if out.shape[2] != occlusion_map.shape[2] or out.shape[3] != occlusion_map.shape[3]: resized_occlusion_map = F.interpolate(occlusion_map, output_size=out.shape[2:], mode="linear", align_corners=False, half_pixel=True) else: resized_occlusion_map = F.identity(occlusion_map) out = out * resized_occlusion_map if test: output_dict["deformed"] = deform_input(source_image, deformation) # intermediate residual blocks in_features = min(max_features, block_expansion * (2 ** num_down_blocks)) for i in range(num_bottleneck_blocks): with nn.parameter_scope(f"residual_block_{i}"): out = resblock(out, in_features=in_features, kernel_size=3, padding=1, test=test, comm=comm) # upsampling for i in range(num_down_blocks): with nn.parameter_scope(f"upblock_{i}"): out_features = min(max_features, block_expansion * (2 ** (num_down_blocks - i - 1))) out = upblock(out, out_features=out_features, kernel_size=3, padding=1, test=test, comm=comm) with nn.parameter_scope("final_conv"): inmaps, outmaps = out.shape[1], num_channels k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(7, 7)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) out = PF.convolution(out, outmaps=num_channels, kernel=(7, 7), pad=(3, 3), w_init=w_init, b_init=b_init) out = F.sigmoid(out) output_dict["prediction"] = out return output_dict
def affine_act(self, x, dims, name): c = x.shape[1] s = I.calc_normal_std_he_forward(c, dims) w_init = I.NormalInitializer(s, ) return self.act(PF.affine(x, dims, w_init=w_init, name=name))