def __call__(self, features): upsampled_inputs = [ F.interpolate(x, output_size=features[0].shape[2:], mode='linear', align_corners=False, half_pixel=True) for x in features ] inputs = F.concatenate(*upsampled_inputs, axis=1) out = self.conv2d(inputs, self.hparams['channels'], kernel_size=1, stride=1, bias=False, name='convs/0/conv') out = F.relu(self.batch_norm(out, name='convs/0/bn')) out = self.conv2d(out, self.hparams['num_classes'], kernel_size=1, stride=1, bias=True, name='conv_seg') out = F.interpolate(out, output_size=self.output_size, mode='linear', align_corners=False, half_pixel=True) if self.test: return F.softmax(out, axis=1) return out
def layer5_1(x): pad5_1 = F.pad(x, (1, 1, 1, 1), 'reflect') conv5_1 = PF.convolution( pad5_1, 256, kernel=( 3, 3), stride=( 1, 1), name='layer5_1.1') conv5_1 = F.instance_normalization( conv5_1, gamma=None, beta=None, channel_axis=1) conv5_1 = PF.prelu(conv5_1, name='layer5_1.3') up5_1 = F.interpolate( conv5_1, scale=( 2, 2), mode='nearest', align_corners=False) pad5_2 = F.pad(up5_1, (1, 1, 1, 1), 'reflect') conv5_2 = PF.convolution( pad5_2, 64, kernel=( 3, 3), stride=( 1, 1), name='layer5_1.6') conv5_2 = F.instance_normalization( conv5_2, gamma=None, beta=None, channel_axis=1) conv5_2 = PF.prelu(conv5_2, name='layer5_1.8') up5_2 = F.interpolate( conv5_2, scale=( 2, 2), mode='nearest', align_corners=False) return up5_2
def rrdb_net(x, num_output_channel, num_rrdb_blocks, growth_channel=32): ''' :param x: input image :param num_output_channel: number of output channels :param num_rrdb_blocks: number of residual blocks :param growth_channel: growth channel (no. of intermediate channel) :return: ''' fea = PF.convolution(x, num_output_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv_first') h = fea with nn.parameter_scope('RRDB_trunk'): for i in range(num_rrdb_blocks): with nn.parameter_scope('{}'.format(i)): h = rrdb(h, num_output_channel, growth_channel) trunk_conv = PF.convolution(h, num_output_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='trunk_conv') fea = fea + trunk_conv up_conv1 = F.leaky_relu(PF.convolution(F.interpolate(fea, scale=(2, 2), mode='nearest'), num_output_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='upconv1'), alpha=0.2) up_conv2 = F.leaky_relu(PF.convolution(F.interpolate(up_conv1, scale=(2, 2), mode='nearest'), num_output_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='upconv2'), alpha=0.2) hr_conv = F.leaky_relu(PF.convolution(up_conv2, num_output_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='HRconv'), alpha=0.2) conv_last = PF.convolution(hr_conv, 3, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv_last') return conv_last
def interpolate_nn(image, frame, scale): ''' Linear Interpolation on Variable image and frame Args: image : image (Variable) frame : frame (Variable) Returns linear interpolated image and frame ''' image = F.interpolate(image, scale) frame = F.interpolate(frame, scale) return image, frame
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args # output_size is the primary argument even if `scale` specified. output_size = self.forward_func.info.args["output_size"] mode = self.forward_func.info.args["mode"] align_corners = self.forward_func.info.args["align_corners"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: g_dy_ = F.interpolate( g_dx0, output_size=output_size, mode=mode, align_corners=align_corners) if accum[1]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def get_model(args, test=False): """ Create computation graph and variables. """ image = nn.Variable( [args.batch_size, 3, args.image_height, args.image_width]) label = nn.Variable( [args.batch_size, 1, args.image_height, args.image_width]) mask = nn.Variable( [args.batch_size, 1, args.image_height, args.image_width]) pred = model.deeplabv3plus_model(image, args.output_stride, args.num_class, test=test, fix_params=False) if pred.shape != label.shape: pred = F.interpolate(pred, output_size=(label.shape[2], label.shape[3]), mode='linear') loss = F.sum( F.softmax_cross_entropy(pred, label, axis=1) * mask) / F.sum(mask) Model = namedtuple('Model', ['image', 'label', 'mask', 'pred', 'loss']) return Model(image, label, mask, pred, loss)
def Upsample(h, nmap_out, scope_name, scale=2): with nn.parameter_scope(scope_name): def sn_w(w): return PF.spectral_norm(w, dim=0) h = F.interpolate(h, scale=(scale, scale), mode="nearest") h = PF.convolution(h, nmap_out*2, (3, 3), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv1") h = PF.batch_normalization(h) h = GLU(h) return h
def test_interpolate_nearest_double_backward(seed, inshape, outsize, scale, sdim_only, align_corners, half_pixel, half_pixel_for_nn, channel_last, ctx, func_name): if channel_last and func_name == "Interpolate": pytest.skip("Interpolate with channel_last is only supported in CUDA.") if sdim_only and channel_last: pytest.skip( "Interpolate for spatial dimension only data is only supported for channel_first option." ) from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.interpolate import InterpolateDataGrad rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] func_args = [ scale, outsize, 'nearest', align_corners, half_pixel, half_pixel_for_nn, channel_last ] # 2nd-order backward_function_tester(rng, F.interpolate, inputs, func_args=func_args, atol_f=1e-6, atol_accum=1e-2, dstep=1e-3, ctx=ctx) # 3rd-order # F.interpolate takes scale and output_size while InterpolateDataGrad takes only output_size # for passing kwargs in the nn.grad, same as F.Interpolate import nnabla as nn import math vinputs = [ nn.Variable(inp.shape) if inp is not None else None for inp in inputs ] y = F.interpolate(*(vinputs + func_args)) x = inputs[0] if scale: input_size = x.shape[-len(scale) - 1:-1] if channel_last else x.shape[-len(scale):] output_size = [ int(math.floor(s * d)) for d, s in zip(input_size, scale) ] else: output_size = outsize df = InterpolateDataGrad(ctx, *([output_size] + func_args[2:])) df.xshape = x.shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, ginputs, func_args=[], ctx=ctx, atol_f=1e-6, atol_accum=5e-2, non_accum_check=True)
def upsample(x, name, with_conv): with nn.parameter_scope(name): B, C, H, W = x.shape x = F.interpolate(x, scale=(2, 2), mode="nearest", align_corners=True) assert x.shape == (B, C, H * 2, W * 2) if with_conv: x = conv(x, C, "upsample_conv") assert x.shape == (B, C, H * 2, W * 2) return x
def upsample(x, factor, training, left_shape=None): if len(x.shape) == 4: if training: h = F.interpolate(x, scale=(factor, factor), mode='linear', align_corners=True) else: h = F.interpolate(x, output_size=(left_shape[2] // 4, left_shape[3] // 4), mode='linear', align_corners=True) elif len(x.shape) == 5: planes = x.shape[1] kernel_size = 2 * factor - factor % 2 stride = int(factor) pad = int(math.ceil((factor - 1) / 2.)) scale_factor = (kernel_size + 1) // 2 if kernel_size % 2 == 1: center = scale_factor - 1 else: center = scale_factor - 0.5 bilinear_kernel = np.zeros([kernel_size, kernel_size, kernel_size], dtype=np.float32) for i in range(kernel_size): for j in range(kernel_size): for d in range(kernel_size): bilinear_kernel[ i, j, d] = (1 - abs(i - center) / scale_factor) * ( 1 - abs(j - center) / scale_factor) * ( 1 - abs(d - center) / scale_factor) w_filter = np.zeros([1, planes, kernel_size, kernel_size, kernel_size]) for i in range(planes): w_filter[:, i, :, :, :] = bilinear_kernel h = PF.deconvolution(x, planes, kernel=(kernel_size, kernel_size, kernel_size), pad=(pad, pad, pad), stride=(stride, stride, stride), w_init=w_filter, fix_parameters=True, group=planes) return h
def upsample(h, maps, up, test=False, name="convblock"): if up == "nearest": h = PF.convolution(h, maps, (3, 3), (1, 1), name=name) h = F.interpolate(h, scale=(2, 2), mode="nearest") elif up == "linear": h = PF.convolution(h, maps, (3, 3), (1, 1), name=name) h = F.interpolate(h, scale=(2, 2), mode="linear") elif up == "unpooling": h = PF.convolution(h, maps, (3, 3), (1, 1), name=name) h = F.unpooling(h, (2, 2)) elif up == "deconv": h = PF.deconvolution(h, maps * 2, (2, 2), (0, 0), (2, 2), name=name) else: raise ValueError( 'Set "up" option in ["nearest", "linear", "unpooling", "deconv"]') h = PF.batch_normalization(h, batch_stat=not test, name=name) h = F.relu(h) return h
def up_block(input, output_channels=64, stride=1, scope='up_block'): with nn.parameter_scope(scope): net = conv2d(input, output_channels, (3, 3), (stride, stride), name='conv_1') net = F.leaky_relu(net, 0.2) net = conv2d(net, output_channels, (3, 3), (stride, stride), name='conv_2') net = F.leaky_relu(net, 0.2) net = F.interpolate(net, scale=(2, 2), channel_last=True) return net
def deform_input(inp, deformation): _, h_old, w_old, _ = deformation.shape _, _, h, w = inp.shape if h_old != h or w_old != w: deformation = F.transpose(deformation, (0, 3, 1, 2)) deformation = F.interpolate(deformation, output_size=( h, w), mode="linear", align_corners=False, half_pixel=True) deformation = F.transpose(deformation, (0, 2, 3, 1)) return F.warp_by_grid(inp, deformation, align_corners=True)
def conv_up(x, out_ch, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name=None): upsample = F.interpolate(x, scale=(2, 2), mode='nearest', align_corners=False) conv = PF.convolution(upsample, out_ch, kernel=kernel, pad=pad, stride=stride, name=name) return conv
def interpolate_data_grad_backward(inputs, output_size, mode, align_corners=True, half_pixel=False, half_pixel_for_nn=False, channel_last=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] gdy = F.interpolate(gdx, None, output_size, mode, align_corners, half_pixel, half_pixel_for_nn, channel_last) return gdy
def loss_dis_real(logits, rec_imgs, part, img, lmd=1.0): # loss = 0.0 # Hinge loss (following the official implementation) loss = F.mean(F.relu(0.2*F.rand(shape=logits.shape) + 0.8 - logits)) # Reconstruction loss for rec_img_big (reconstructed from 8x8 features of the original image) # Reconstruction loss for rec_img_small (reconstructed from 8x8 features of the resized image) # Reconstruction loss for rec_img_part (reconstructed from a part of 16x16 features of the original image) if lmd > 0.0: # Ground-truth img_128 = F.interpolate(img, output_size=(128, 128)) img_256 = F.interpolate(img, output_size=(256, 256)) img_half = F.where(F.greater_scalar( part[0], 0.5), img_256[:, :, :128, :], img_256[:, :, 128:, :]) img_part = F.where(F.greater_scalar( part[1], 0.5), img_half[:, :, :, :128], img_half[:, :, :, 128:]) # Integrated perceptual loss loss = loss + lmd * \ reconstruction_loss_lpips(rec_imgs, [img_128, img_part]) return loss
def get_t_d(conf, r_inputs, d_data): """ Create Real and fake temoral discriminators """ # to crop out unstable part for temporal discriminator, details in TecoGAN supplemental paper crop_size_dt = int(conf.train.crop_size * 4 * conf.gan.crop_dt) offset_dt = (conf.train.crop_size * 4 - crop_size_dt) // 2 crop_size_dt = conf.train.crop_size * 4 - offset_dt * 2 paddings = (0, 0, offset_dt, offset_dt, offset_dt, offset_dt, 0, 0) with nn.parameter_scope("discriminator"): real_warp = warp_by_flow(d_data.t_targets, d_data.t_vel) real_warp = space_to_depth_disc(real_warp, d_data.t_batch) # equivalent to tf.image.crop_to_bounding_box real_warp = real_warp[:, offset_dt:offset_dt + crop_size_dt, offset_dt:offset_dt + crop_size_dt, :] real_warp = F.pad(real_warp, paddings) before_warp = space_to_depth_disc(d_data.t_targets, d_data.t_batch) t_input = space_to_depth_disc(r_inputs[:, :d_data.t_size, :, :, :], d_data.t_batch) # resizing using bilinear interpolation input_hi = F.interpolate(t_input, scale=(4, 4), mode='linear', channel_last=True) real_warp = F.concatenate(before_warp, real_warp, input_hi) tdiscrim_real_output, real_layers = discriminator(real_warp) fake_warp = warp_by_flow(d_data.t_gen_output, d_data.t_vel) fake_warp = space_to_depth_disc(fake_warp, d_data.t_batch) fake_warp = fake_warp[:, offset_dt:offset_dt + crop_size_dt, offset_dt:offset_dt + crop_size_dt, :] fake_warp = F.pad(fake_warp, paddings) before_warp = space_to_depth_disc(d_data.t_gen_output, d_data.t_batch, inplace=False) fake_warp = F.concatenate(before_warp, fake_warp, input_hi) tdiscrim_fake_output, fake_layers = discriminator(fake_warp) temporal_disc = collections.namedtuple( 'temporal_disc', 'tdiscrim_real_output,' 'real_layers, tdiscrim_fake_output, fake_layers') return temporal_disc(tdiscrim_real_output=tdiscrim_real_output, real_layers=real_layers, tdiscrim_fake_output=tdiscrim_fake_output, fake_layers=fake_layers)
def spade(x, m, hidden_dim=128, kernel=(3, 3), norm_type="in"): """ Spatially-Adaptive Normalization proposed in Semantic Image Synthesis with Spatially-Adaptive Normalization (https://arxiv.org/pdf/1903.07291.pdf). Args: x (nn.Variable): Input variable for spade layer. m (nn.Variable): Spatial condition variable like object_id mask segmentation. This is for generating adaptive scale(gamma) and adaptice bias(beta) applied after normalization. hidden_dim (int): Hidden dims for first convolution applied to m. kernel (list of int): Kernel shapes for convolutions. norm_type (str) : A type of normalization. ["in", "bn"] are supported now. """ # x: (N, Cx, H, W), m: (N, Cm, H, W) assert len(x.shape) == 4 and len(m.shape) == 4 pad = tuple(i // 2 for i in kernel) c_dim = x.shape[1] conv_args = dict(kernel=kernel, pad=pad) with ps("spatial_adaptive_normalization"): normalized = _normalize(x, norm_type) m = F.interpolate(m, output_size=x.shape[2:], mode="nearest") with ps("shared"): actv = F.relu( PF.convolution(m, hidden_dim, w_init=w_init(m, hidden_dim), **conv_args)) with ps("gamma"): gamma = PF.convolution(actv, c_dim, w_init=w_init(actv, c_dim), **conv_args) with ps("beta"): beta = PF.convolution(actv, c_dim, w_init=w_init(actv, c_dim), **conv_args) return normalized * gamma + beta
def deeplabv3plus_model(x, output_stride, num_classes, test=False, fix_params=False): '''Encoder ''' # Get decoder endpoints from backbone endpoints = xception.xception_65(x, test=test, fix_params=fix_params) low_level_features = endpoints['Decoder End Point 1'] encoder_output = atrous_spatial_pyramid_pooling( endpoints['Decoder End Point 2'], output_stride, test=test, fix_params=fix_params) with nn.parameter_scope("concat_projection"): encoder_output = PF.convolution(encoder_output, 256, (1, 1), with_bias=False, fix_parameters=fix_params) encoder_output = F.relu( PF.batch_normalization(encoder_output, batch_stat=not test, fix_parameters=fix_params)) '''Decoder ''' with nn.parameter_scope("decoder"): with nn.parameter_scope("upsample1"): upsampled = F.interpolate( encoder_output, output_size=(low_level_features.shape[2], low_level_features.shape[2]), mode='linear') h = decoder(low_level_features, upsampled, num_classes, test=test, fix_params=fix_params) return h
def hg_module(n, x): with nn.parameter_scope(f"{n - 1}.0.0"): up1 = ops[n - 1][0](x) low1 = F.max_pooling(x, kernel=(2, 2), stride=(2, 2)) with nn.parameter_scope(f"{n - 1}.1.0"): low1 = ops[n - 1][1](low1) if n > 1: low2 = hg_module(n - 1, low1) else: with nn.parameter_scope(f"{n - 1}.3.0"): low2 = ops[n - 1][3](low1) with nn.parameter_scope(f"{n - 1}.2.0"): low3 = ops[n - 1][2](low2) up2 = F.interpolate(low3, scale=(2, 2), mode="nearest") out = up1 + up2 return out
def upblock(x, out_features, kernel_size=3, padding=1, groups=1, test=False, comm=None): if comm: batchnorm = functools.partial(PF.sync_batch_normalization, comm=comm, group='world', axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) else: # 1 GPU batchnorm = functools.partial(PF.batch_normalization, axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) inmaps, outmaps = x.shape[1], out_features k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(kernel_size, kernel_size)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) out = F.interpolate(x, scale=(2, 2), mode="nearest") with nn.parameter_scope("upblock"): out = PF.convolution(out, outmaps=out_features, kernel=(kernel_size, kernel_size), pad=(padding, padding), group=groups, w_init=w_init, b_init=b_init) out = batchnorm(out) out = F.relu(out, inplace=True) return out
def __call__(self, img0, img1, normalize=False, mean_batch=False): """ Args: img0, img1(Variable): Variable containing images. N batch images can be used. normalize(bool): if True, assumes inputs are in [0., 1.] and scales the inputs between [-1., +1.]. if False, assumes inputs are in [-1., +1.] """ assert img0.shape == img1.shape, "img0 and img1 have different shape." assert isinstance(img0, nn.Variable), "img0 is not Variable." assert isinstance(img1, nn.Variable), "img1 is not Variable." if normalize: # scales the input between [-1., +1.] img0 = 2 * img0 - 1 img1 = 2 * img1 - 1 if self.apply_scale: img0 = (img0 - self._shift) / self._scale img1 = (img1 - self._shift) / self._scale dists = compute_each_feat_dist(img0, img1, feat_extractor=self.feat_extractor) if self.spatial: # note that this upsampling method is different from the original LPIPS. # in the original implementation, it is torch.nn.upsample(mode="bilinear") dists = [ F.interpolate(dist * (1. * img0.shape[2] / dist.shape[2]), output_size=img0.shape[2:]) for dist in dists ] else: dists = [ F.mean(dist, axis=[2, 3], keepdims=True) for dist in dists ] # returns N scores ((N, 1, 1, 1)) lpips_val = F.sum(F.stack(*dists), axis=0) if mean_batch: lpips_val = F.mean(lpips_val, axis=0) return lpips_val
def anti_alias_interpolate(input, channels, scale): # no trainable parameters exist. if scale == 1.0: # no interpolation executed return F.identity(input) sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 ka = kernel_size // 2 if kernel_size % 2 == 0: kb = ka - 1 else: kb = ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] kernel = 1 xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1)) ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1)) meshgrids = (F.tile(xa, (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1))) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2)) kernel = kernel / F.sum(kernel, keepdims=True) # Reshape to depthwise convolutional weight kernel = F.reshape(kernel, (1, 1) + kernel.shape) kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size)) # if using the pre-computed kernel, no need to compute here. out = F.pad(input, (ka, kb, ka, kb)) out = F.convolution(out, weight=kernel, group=channels) out = F.interpolate(out, scale=(scale, scale), mode="nearest") return out
def hour_glass(inp, depth, num_features): # Upper branch up1 = inp with nn.parameter_scope('b1_' + str(depth)): up1 = conv_block(up1, num_features, num_features) # Lower branch low1 = F.average_pooling(inp, (2, 2), stride=(2, 2)) with nn.parameter_scope('b2_' + str(depth)): low1 = conv_block(low1, num_features, num_features) if depth > 1: low2 = hour_glass(low1, depth - 1, num_features) else: low2 = low1 with nn.parameter_scope('b2_plus_' + str(depth)): low2 = conv_block(low2, num_features, num_features) low3 = low2 with nn.parameter_scope('b3_' + str(depth)): low3 = conv_block(low3, num_features, num_features) up2 = F.interpolate(low3, scale=(2, 2), mode='nearest') return up1 + up2
def __init__(self, nf, image_shape, ext_upsamples=0): ext_upsamples = int(ext_upsamples) assert isinstance(ext_upsamples, int) and 0 <= ext_upsamples <= 2,\ "ext_upsamples must be in the range of [0, 2]." self.nf = nf self.image_shape = image_shape self.num_upsample = 5 + ext_upsamples self.head_0 = SpadeResidualBlock(16 * nf) self.G_middle_0 = SpadeResidualBlock(16 * nf) self.G_middle_1 = SpadeResidualBlock(16 * nf) self.up_0 = SpadeResidualBlock(8 * nf) self.up_1 = SpadeResidualBlock(4 * nf) self.up_2 = SpadeResidualBlock(2 * nf) self.up_3 = SpadeResidualBlock(nf) if self.num_upsample > 6: self.up_4 = SpadeResidualBlock(nf // 2) self.up = lambda x: F.interpolate(x, scale=(2, 2), mode="nearest")
def decoder(x, upsampled, num_classes, test=False, fix_params=False): # Project low-level features with nn.parameter_scope("feature_projection0"): h = PF.convolution(x, 48, (1, 1), with_bias=False, fix_parameters=fix_params) h = F.relu( PF.batch_normalization(h, batch_stat=not test, fix_parameters=fix_params)) h = F.concatenate(upsampled, h, axis=1) for i in range(2): with nn.parameter_scope("decoder_conv" + str(i)): h = xception.separable_conv_with_bn(h, 256, last_block=True, eps=1e-05, out=True, test=test, fix_params=fix_params) with nn.parameter_scope("logits/affine"): h = PF.convolution(h, num_classes, (1, 1), with_bias=True, fix_parameters=fix_params) # no activation with nn.parameter_scope("upsample2"): h = F.interpolate(h, output_size=(h.shape[2] * 4 - 3, h.shape[2] * 4 - 3), mode='linear') return h
def graph(x0): # F.swish -> F.interpolate x1 = F.swish(x0) x1.apply(recompute=True) x2 = F.interpolate(x1, scale=(2,)) return x2
def visualize(self, driving, source, out): images = [] # Source image with keypoints if isinstance(source, nn.Variable): source = source.d kp_source = out['kp_source']['value'].d source = np.transpose(source, [0, 2, 3, 1]) images.append((source, kp_source)) # Equivariance visualization, not used when animation (eval) if 'transformed_frame' in out: transformed = out['transformed_frame'].d transformed = np.transpose(transformed, [0, 2, 3, 1]) transformed_kp = out['transformed_kp']['value'].d images.append((transformed, transformed_kp)) # Driving image with keypoints kp_driving = out['kp_driving']['value'].d if isinstance(driving, nn.Variable): driving = driving.d driving = np.transpose(driving, [0, 2, 3, 1]) images.append((driving, kp_driving)) # Deformed image if 'deformed' in out: deformed = out['deformed'].d deformed = np.transpose(deformed, [0, 2, 3, 1]) images.append(deformed) # Result with and without keypoints prediction = out['prediction'].d prediction = np.transpose(prediction, [0, 2, 3, 1]) if 'kp_norm' in out: kp_norm = out['kp_norm']['value'].d images.append((prediction, kp_norm)) images.append(prediction) # Occlusion map if 'occlusion_map' in out: with nn.auto_forward(): occlusion_map = F.tile(out['occlusion_map'], (1, 3, 1, 1)) occlusion_map = F.interpolate( occlusion_map, output_size=source.shape[1:3], mode='nearest') occlusion_map = np.transpose(occlusion_map.d, [0, 2, 3, 1]) images.append(occlusion_map) # Deformed images according to each individual transform if 'sparse_deformed' in out: full_mask = [] for i in range(out['sparse_deformed'].shape[1]): with nn.auto_forward(): image = out['sparse_deformed'][:, i] image = F.interpolate( image, output_size=source.shape[1:3], mode='nearest') mask = F.tile(out['mask'][:, i:(i + 1)], (1, 3, 1, 1)) mask = F.interpolate( mask, output_size=source.shape[1:3], mode='nearest') image = np.transpose(image.d, (0, 2, 3, 1)) mask = np.transpose(mask.d, (0, 2, 3, 1)) if i != 0: color = np.array(self.colormap( (i - 1) / (out['sparse_deformed'].shape[1] - 1)))[:3] else: color = np.array((0, 0, 0)) color = color.reshape((1, 1, 1, 3)) images.append(image) if i != 0: images.append(mask * color) else: images.append(mask) full_mask.append(mask * color) images.append(sum(full_mask)) image = self.create_image_grid(*images) image = (255 * image).astype(np.uint8) return image
def pcd_align(fea1, fea2): """ Alignment module using Pyramid, Cascading and Deformable convolution with 3 pyramid levels[L1, L2, L3]. """ num_filters = 64 deformable_groups = 8 kernel_sz, stride_ln, pad_ln = 3, 1, 1 def deform_conv(fea, offset_input, name): """ deformable convolution block """ with nn.parameter_scope(name): channels_ = deformable_groups * 3 * kernel_sz * kernel_sz conv_offset_mask = conv2d(offset_input, channels_, kernel_sz, stride_ln, pad_ln, bias=True, name='conv_offset_mask') channels = channels_ / 3 offset = conv_offset_mask[:, :2 * channels, :, :] mask = F.sigmoid( conv_offset_mask[:, 2 * channels:3 * channels, :, :]) deform_conv = PF.deformable_convolution(fea, num_filters, (kernel_sz, kernel_sz), offset, mask, deformable_group=deformable_groups, stride=( stride_ln, stride_ln), pad=(pad_ln, pad_ln), dilation=(1, 1), with_bias=True) return deform_conv y = [] with nn.parameter_scope('pcd_align'): # fea1 # L3: level 3, 1/4 spatial size l3_offset = F.concatenate(fea1[2], fea2[2], axis=1) l3_offset = F.leaky_relu( conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l3_offset_conv1_1')) l3_offset = F.leaky_relu( conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l3_offset_conv2_1')) l3_fea = F.leaky_relu(deform_conv( fea1[2], l3_offset, name='l3_dcnpack_1')) # L2: level 2, 1/2 spatial size l2_offset = F.concatenate(fea1[1], fea2[1], axis=1) l2_offset = F.leaky_relu( conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_offset_conv1_1')) l3_offset = F.interpolate(l3_offset, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l2_offset = F.leaky_relu( conv2d(F.concatenate(l2_offset, l3_offset * 2, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_offset_conv2_1')) l2_offset = F.leaky_relu( conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_offset_conv3_1')) l2_fea = deform_conv(fea1[1], l2_offset, name='l2_dcnpack_1') l3_fea = F.interpolate(l3_fea, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l2_fea = F.leaky_relu( conv2d(F.concatenate(l2_fea, l3_fea, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_fea_conv_1')) # L1: level 1, original spatial size l1_offset = F.concatenate(fea1[0], fea2[0], axis=1) l1_offset = F.leaky_relu( conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_offset_conv1_1')) l2_offset = F.interpolate(l2_offset, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l1_offset = F.leaky_relu( conv2d(F.concatenate(l1_offset, l2_offset * 2, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_offset_conv2_1')) l1_offset = F.leaky_relu( conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_offset_conv3_1')) l1_fea = deform_conv(fea1[0], l1_offset, name='l1_dcnpack_1') l2_fea = F.interpolate(l2_fea, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l1_fea = conv2d(F.concatenate(l1_fea, l2_fea, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_fea_conv_1') y.append(l1_fea) # fea2 # L3: level 3, 1/4 spatial size l3_offset = F.concatenate(fea2[2], fea1[2], axis=1) l3_offset = F.leaky_relu( conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l3_offset_conv1_2')) l3_offset = F.leaky_relu( conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l3_offset_conv2_2')) l3_fea = F.leaky_relu(deform_conv( fea2[2], l3_offset, name='l3_dcnpack_2')) # L2: level 2, 1/2 spatial size l2_offset = F.concatenate(fea2[1], fea1[1], axis=1) l2_offset = F.leaky_relu( conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_offset_conv1_2')) l3_offset = F.interpolate(l3_offset, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l2_offset = F.leaky_relu( conv2d(F.concatenate(l2_offset, l3_offset * 2, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_offset_conv2_2')) l2_offset = F.leaky_relu( conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_offset_conv3_2')) l2_fea = deform_conv(fea2[1], l2_offset, name='l2_dcnpack_2') l3_fea = F.interpolate(l3_fea, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l2_fea = F.leaky_relu( conv2d(F.concatenate(l2_fea, l3_fea, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l2_fea_conv_2')) # L1: level 1, original spatial size l1_offset = F.concatenate(fea2[0], fea1[0], axis=1) l1_offset = F.leaky_relu( conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_offset_conv1_2')) l2_offset = F.interpolate(l2_offset, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l1_offset = F.leaky_relu( conv2d(F.concatenate(l1_offset, l2_offset * 2, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_offset_conv2_2')) l1_offset = F.leaky_relu( conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_offset_conv3_2')) l1_fea = deform_conv(fea2[0], l1_offset, name='l1_dcnpack_2') l2_fea = F.interpolate(l2_fea, scale=( 2, 2), mode='linear', align_corners=False, half_pixel=True) l1_fea = conv2d(F.concatenate(l1_fea, l2_fea, axis=1), num_filters, kernel_sz, stride_ln, pad_ln, bias=True, name='l1_fea_conv_2') y.append(l1_fea) y = F.concatenate(*y, axis=1) return y
def Discriminator(img, label="real", scope_name="Discriminator", ndf=64): with nn.parameter_scope(scope_name): if type(img) is not list: img_small = F.interpolate(img, output_size=(128, 128)) else: img_small = img[1] img = img[0] def sn_w(w): return PF.spectral_norm(w, dim=0) # InitLayer: -> 256x256 with nn.parameter_scope("init"): h = img if img.shape[2] == 1024: h = PF.convolution(h, ndf // 8, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv1") h = F.leaky_relu(h, 0.2) h = PF.convolution(h, ndf // 4, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv2") h = PF.batch_normalization(h) h = F.leaky_relu(h, 0.2) elif img.shape[2] == 512: h = PF.convolution(h, ndf // 4, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv2") h = F.leaky_relu(h, 0.2) else: h = PF.convolution(h, ndf // 4, (3, 3), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv3") h = F.leaky_relu(h, 0.2) # Calc base features f_256 = h f_128 = DownsampleComp(f_256, ndf // 2, "down256->128") f_64 = DownsampleComp(f_128, ndf * 1, "down128->64") f_32 = DownsampleComp(f_64, ndf * 2, "down64->32") # Apply SLE f_32 = SLE(f_32, f_256, "sle256->32") f_16 = DownsampleComp(f_32, ndf * 4, "down32->16") f_16 = SLE(f_16, f_128, "sle128->16") f_8 = DownsampleComp(f_16, ndf * 16, "down16->8") f_8 = SLE(f_8, f_64, "sle64->8") # Conv + BN + LeakyRely + Conv -> logits (5x5) with nn.parameter_scope("last"): h = PF.convolution(f_8, ndf * 16, (1, 1), apply_w=sn_w, with_bias=False, name="conv1") h = PF.batch_normalization(h) h = F.leaky_relu(h, 0.2) logit_large = PF.convolution(h, 1, (4, 4), apply_w=sn_w, with_bias=False, name="conv2") # Another path: "down_from_small" in the official code with nn.parameter_scope("down_from_small"): h_s = PF.convolution(img_small, ndf // 2, (4, 4), stride=(2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False, name="conv1") h_s = F.leaky_relu(h_s, 0.2) h_s = Downsample(h_s, ndf * 1, "dfs64->32") h_s = Downsample(h_s, ndf * 2, "dfs32->16") h_s = Downsample(h_s, ndf * 4, "dfs16->8") fea_dec_small = h_s logit_small = PF.convolution(h_s, 1, (4, 4), apply_w=sn_w, with_bias=False, name="conv2") # Concatenate logits logits = F.concatenate(logit_large, logit_small, axis=1) # Reconstruct images rec_img_big = SimpleDecoder(f_8, "dec_big") rec_img_small = SimpleDecoder(fea_dec_small, "dec_small") part_ax2 = F.rand(shape=(img.shape[0], )) part_ax3 = F.rand(shape=(img.shape[0], )) f_16_ax2 = F.where(F.greater_scalar(part_ax2, 0.5), f_16[:, :, :8, :], f_16[:, :, 8:, :]) f_16_part = F.where(F.greater_scalar(part_ax3, 0.5), f_16_ax2[:, :, :, :8], f_16_ax2[:, :, :, 8:]) rec_img_part = SimpleDecoder(f_16_part, "dec_part") if label == "real": return logits, [rec_img_big, rec_img_small, rec_img_part], [part_ax2, part_ax3] else: return logits