def layer4_1(x): pad4_1 = F.pad(x, (1, 1, 1, 1), 'reflect') conv4_1 = PF.convolution( pad4_1, 256, kernel=( 3, 3), stride=( 1, 1), name='layer4_1.1') conv4_1 = F.instance_normalization( conv4_1, gamma=None, beta=None, channel_axis=1) conv4_1 = PF.prelu(conv4_1, name='layer4_1.3') pad4_2 = F.pad(conv4_1, (1, 1, 1, 1), 'reflect') conv4_2 = PF.convolution( pad4_2, 64, kernel=( 3, 3), stride=( 1, 1), name='layer4_1.5') conv4_2 = F.instance_normalization( conv4_2, gamma=None, beta=None, channel_axis=1) conv4_2 = PF.prelu(conv4_2, name='layer4_1.7') up4_1 = F.interpolate( conv4_2, scale=( 2, 2), mode='nearest', align_corners=False) return up4_1
def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0): assert isinstance(x, nn.Variable) or (x, nn.NdArray) k = np.asarray(k, dtype=np.float32) assert x.ndim == 4 inH = x.shape[1] inW = x.shape[2] minorDim = x.shape[3] kernelH, kernelW = k.shape assert inW >= 1 and inH >= 1 assert kernelW >= 1 and kernelH >= 1 assert isinstance(upx, int) and isinstance(upy, int) assert isinstance(downx, int) and isinstance(downy, int) assert isinstance(padx0, int) and isinstance(padx1, int) assert isinstance(pady0, int) and isinstance(pady1, int) x = F.reshape(x, [-1, inH, 1, inW, 1, minorDim], inplace=False) x = F.pad(x, [0, 0, 0, 0, 0, upy - 1, 0, 0, 0, upx - 1, 0, 0]) x = F.reshape(x, [-1, inH * upy, inW * upx, minorDim], inplace=False) x = F.pad(x, [ 0, 0, max(pady0, 0), max(pady1, 0), max(padx0, 0), max(padx1, 0), 0, 0 ]) x = x[:, max(-pady0, 0):x.shape[1] - max(-pady1, 0), max(-padx0, 0):x.shape[2] - max(-padx1, 0), :] # Convolve with filter. x = F.transpose(x, [0, 3, 1, 2]) x = F.reshape( x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1], inplace=False) w = nn.Variable.from_numpy_array(k[np.newaxis, np.newaxis, ::-1, ::-1]) x = F.convolution(x, w) x = F.reshape(x, [ -1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1 ], inplace=False) x = F.transpose(x, [0, 2, 3, 1]) if downx == 1 and downy == 1: return x return x[:, ::downy, ::downx, :]
def inst_to_boundary(inst_label): pad = F.pad(inst_label, (1, 1, 1, 1)) bm = F.constant(val=0, shape=pad.shape) bm = F.logical_or(bm, F.not_equal(pad, F.pad(inst_label, (1, 1, 0, 2)))) bm = F.logical_or(bm, F.not_equal(pad, F.pad(inst_label, (1, 1, 2, 0)))) bm = F.logical_or(bm, F.not_equal(pad, F.pad(inst_label, (0, 2, 1, 1)))) bm = F.logical_or(bm, F.not_equal(pad, F.pad(inst_label, (2, 0, 1, 1)))) return bm[:, 1:-1, 1:-1] # (N, H, W)
def residual_block(self, x, o_channels): pad_width = get_symmetric_padwidth(1, channel_last=self.channel_last) with nn.parameter_scope("residual_1"): h = F.pad(x, pad_width=pad_width, mode=self.padding_type) h = PF.convolution(h, o_channels, (3, 3), **self.conv_opts) h = self.instance_norm_relu(h) with nn.parameter_scope("residual_2"): h = F.pad(h, pad_width=pad_width, mode=self.padding_type) h = PF.convolution(h, o_channels, (3, 3), **self.conv_opts) h = PF.instance_normalization(h, **self.norm_opts) return x + h
def dyn_sep_up_operation(x, dr_k_v, dr_k_h, k_sz, sf): """ Dynamic separable upsampling operation with 1D separable local kernels. x: [B, H, W, C], dr_k_v: [B, H, W, 41*sf*sf], dr_k_h: [B, H, W, 41*sf*sf] out: [B, H*sf, W*sf, C] """ sz = x.shape pad = k_sz // 2 # local filter pad size # [B, H, W, C*sf*sf] out_v = nn.Variable((sz[0], sz[1], sz[2], sz[3] * sf**2)) out_v.data.zero() # [B, H, W, C*sf*sf] out_h = nn.Variable((sz[0], sz[1], sz[2], sz[3] * sf**2)) out_h.data.zero() img_pad = F.pad(x, (0, 0, pad, pad, 0, 0, 0, 0)) img_pad_y = F.reshape( img_pad[:, :, :, 0], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1)) img_pad_y = F.tile(img_pad_y, [1, 1, 1, sf**2]) img_pad_u = F.reshape( img_pad[:, :, :, 1], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1)) img_pad_u = F.tile(img_pad_u, [1, 1, 1, sf**2]) img_pad_v = F.reshape( img_pad[:, :, :, 2], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1)) img_pad_v = F.tile(img_pad_v, [1, 1, 1, sf**2]) img_pad = F.concatenate(img_pad_y, img_pad_u, img_pad_v, axis=3) # vertical 1D filter for i in range(k_sz): out_v = out_v + img_pad[:, i:i + sz[1], :, :] * F.tile( dr_k_v[:, :, :, i:k_sz * sf**2:k_sz], [1, 1, 1, 3]) img_pad = F.pad(out_v, (0, 0, 0, 0, pad, pad, 0, 0)) # horizontal 1D filter for i in range(k_sz): out_h = out_h + img_pad[:, :, i:i + sz[2], :] * F.tile( dr_k_h[:, :, :, i:k_sz * sf**2:k_sz], [1, 1, 1, 3]) # depth to space upsampling (YUV) out = depth_to_space(out_h[:, :, :, 0:sf**2], sf) out = F.concatenate(out, depth_to_space(out_h[:, :, :, sf**2:2 * sf**2], sf), axis=3) out = F.concatenate(out, depth_to_space(out_h[:, :, :, 2 * sf**2:3 * sf**2], sf), axis=3) return out
def conv(x, channels, kernel=4, stride=2, pad=0, pad_type='zero', use_bias=True, scope='conv_0'): """ Convolution for discriminator """ w_n_shape = (channels, kernel, kernel, x.shape[-1]) w_init = truncated_normal(w_n_shape, mean=0.0, std=0.02) b_init = I.ConstantInitializer(0.) with nn.parameter_scope(scope): if pad > 0: h = x.shape[1] if h % stride == 0: pad = pad * 2 else: pad = max(kernel - (h % stride), 0) pad_top = pad // 2 pad_bottom = pad - pad_top pad_left = pad // 2 pad_right = pad - pad_left if pad_type == 'zero': x = F.pad( x, (0, 0, pad_top, pad_bottom, pad_left, pad_right, 0, 0)) if pad_type == 'reflect': x = F.pad( x, (0, 0, pad_top, pad_bottom, pad_left, pad_right, 0, 0), mode='reflect') def apply_w(w): return PF.spectral_norm(w, dim=0) x = PF.convolution(x, channels, kernel=(kernel, kernel), stride=(stride, stride), apply_w=apply_w, w_init=w_init, b_init=b_init, with_bias=use_bias, channel_last=True) return x
def get_t_d(conf, r_inputs, d_data): """ Create Real and fake temoral discriminators """ # to crop out unstable part for temporal discriminator, details in TecoGAN supplemental paper crop_size_dt = int(conf.train.crop_size * 4 * conf.gan.crop_dt) offset_dt = (conf.train.crop_size * 4 - crop_size_dt) // 2 crop_size_dt = conf.train.crop_size * 4 - offset_dt * 2 paddings = (0, 0, offset_dt, offset_dt, offset_dt, offset_dt, 0, 0) with nn.parameter_scope("discriminator"): real_warp = warp_by_flow(d_data.t_targets, d_data.t_vel) real_warp = space_to_depth_disc(real_warp, d_data.t_batch) # equivalent to tf.image.crop_to_bounding_box real_warp = real_warp[:, offset_dt:offset_dt + crop_size_dt, offset_dt:offset_dt + crop_size_dt, :] real_warp = F.pad(real_warp, paddings) before_warp = space_to_depth_disc(d_data.t_targets, d_data.t_batch) t_input = space_to_depth_disc(r_inputs[:, :d_data.t_size, :, :, :], d_data.t_batch) # resizing using bilinear interpolation input_hi = F.interpolate(t_input, scale=(4, 4), mode='linear', channel_last=True) real_warp = F.concatenate(before_warp, real_warp, input_hi) tdiscrim_real_output, real_layers = discriminator(real_warp) fake_warp = warp_by_flow(d_data.t_gen_output, d_data.t_vel) fake_warp = space_to_depth_disc(fake_warp, d_data.t_batch) fake_warp = fake_warp[:, offset_dt:offset_dt + crop_size_dt, offset_dt:offset_dt + crop_size_dt, :] fake_warp = F.pad(fake_warp, paddings) before_warp = space_to_depth_disc(d_data.t_gen_output, d_data.t_batch, inplace=False) fake_warp = F.concatenate(before_warp, fake_warp, input_hi) tdiscrim_fake_output, fake_layers = discriminator(fake_warp) temporal_disc = collections.namedtuple( 'temporal_disc', 'tdiscrim_real_output,' 'real_layers, tdiscrim_fake_output, fake_layers') return temporal_disc(tdiscrim_real_output=tdiscrim_real_output, real_layers=real_layers, tdiscrim_fake_output=tdiscrim_fake_output, fake_layers=fake_layers)
def pad_replicate(x): start = x[:, :, 0, :] end = x[:, :, -1, :] new = F.pad(x, (1, 1, 0, 0), 'reflect') new[:, :, 0, :] = start new[:, :, -1, :] = end return new
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args pad_width = self.forward_func.info.args["pad_width"] mode = self.forward_func.info.args["mode"] constant_value = self.forward_func.info.args["constant_value"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: g_dy_ = F.pad(g_dx0, pad_width, mode, constant_value) if accum[1]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def conv1d(inputs, kernel_size, channels, activation, is_training, scope): r"""Create an 1D convolutional layer. Args: inputs (nn.Variable): The input sequence of shape B x C x T kernel_size (int): The kernel size. channels (list of int): A list of integers representing the channel sizes. activation (nn.function): Activation function, which will be applied. is_training (bool): If `is_training` is `True`, then batch_stat will be computed. scope (str): The parameter scope name. Returns: nn.Variable: Output variable. """ if kernel_size % 2 == 0: inputs = F.pad(inputs, (0, ) * 5 + (1, ), mode='constant', constant_value=0) with nn.parameter_scope(scope): out = PF.convolution(inputs, channels, kernel=(kernel_size, ), pad=((kernel_size - 1) // 2, ), with_bias=False) if activation is not None: out = activation(out) out = PF.batch_normalization(out, batch_stat=is_training) return out
def factorized_reduction(x, output_filter, scope, test, is_search): """ Applying spatial reduction to input variable. """ assert output_filter % 2 == 0 x = F.relu(x) with nn.parameter_scope(scope): with nn.parameter_scope("conv_1"): conv_1 = PF.convolution(x, output_filter // 2, (1, 1), pad=None, stride=(2, 2), with_bias=False) conv_2 = F.pad(x, (0, 1, 0, 1), mode='constant') conv_2 = F.slice(conv_2, (0, 0, 1, 1)) with nn.parameter_scope("conv_2"): conv_2 = PF.convolution(conv_2, output_filter // 2, (1, 1), pad=None, stride=(2, 2), with_bias=False) final_conv = F.concatenate(conv_1, conv_2, axis=1) with nn.parameter_scope("reduction_bn"): final_conv = PF.batch_normalization(final_conv, batch_stat=not test, fix_parameters=is_search) return final_conv
def call(self, x, p): r"""Returns discriminator period. Args: x (nn.Variable): Input variable of shape (B, 1, L). p (int): Period. Returns: List[nn.Variable]: List of feature maps. """ results = list() b, c, t = x.shape if t % p: x = F.pad(x, (0, 0, 0, p - (t % p)), 'reflect') t = x.shape[-1] x = F.reshape(x, (b, c, t // p, p)) for i, c in enumerate([32, 128, 512, 1024, 1024]): with nn.parameter_scope(f"conv_{i}"): x = wn_conv(x, c, (5, 1), stride=(3, 1) if i < 4 else (1, 1), pad=(2, 0)) x = F.leaky_relu(x, 0.1) results.append(x) with nn.parameter_scope("post_conv"): x = wn_conv(x, 1, (3, 1), pad=(1, 0)) x = F.leaky_relu(x, 0.1) results.append(x) return results
def decode(input_feature, output_nc, n_downsampling, ngf, norm_layer, use_bias): h = input_feature w_init = I.NormalInitializer(sigma=0.02, rng=None) for i in range(n_downsampling): with nn.parameter_scope("dec_downsampling_{}".format(i)): mult = 2**(n_downsampling - i) h = PF.deconvolution(h, int(ngf * mult / 2), kernel=(4, 4), stride=(2, 2), pad=(1, 1), w_init=w_init, with_bias=use_bias) # kernel changed 3 -> 4 to make the output fit to the desired size. h = norm_layer(h) h = F.relu(h) h = F.pad(h, (3, 3, 3, 3), 'reflect') h = PF.convolution(h, output_nc, kernel=(7, 7), w_init=w_init, with_bias=use_bias, name="dec_last_conv") h = F.tanh(h) return h
def factorized_reduction(x, output_filter, scope, test): """ Applying spatial reduction to input variable. Input variable is passed to: Skip path 1, applied average pooling with stride 2. Skip path 2, first padded with 0 on the right and bottom, then shifted by 1 (so that those 0-padded sides will be added, whereas its shape is the same as the original), Then these 2 variables are concatenated along the depth dimension. """ with nn.parameter_scope(scope): path1 = F.average_pooling(x, (1, 1), (2, 2)) with nn.parameter_scope("path1_conv"): path1 = PF.convolution( path1, output_filter // 2, (1, 1), with_bias=False) path2 = F.pad(x, (0, 1, 0, 1), mode='constant') path2 = F.slice(path2, (0, 0, 1, 1)) path2 = F.average_pooling(path2, (1, 1), (2, 2)) with nn.parameter_scope("path2_conv"): path2 = PF.convolution( path2, output_filter // 2, (1, 1), with_bias=False) final_path = F.concatenate(path1, path2, axis=1) with nn.parameter_scope("reduction_bn"): final_path = PF.batch_normalization( final_path, batch_stat=not test) return final_path
def convolution(x, maps, kernel=(3, 3), pad=(0, 0, 0, 0), stride=(1, 1), pad_mode="reflect", name="conv"): """Convolution wapper""" if type(kernel) == int: kernel = tuple([kernel] * 2) if type(pad) == int: pad = tuple([pad] * 4) if type(stride) == int: stride = tuple([stride] * 2) h = x #s = nn.initializer.calc_normal_std_glorot(h.shape[1], maps, kernel=kernel) s = nn.initializer.calc_normal_std_he_backward(h.shape[1], maps, kernel=kernel) init = nn.initializer.NormalInitializer(s) h = F.pad(h, pad, mode=pad_mode) h = PF.convolution(h, maps, kernel, stride=stride, with_bias=True, w_init=init, name=name) return h
def main(): """ Inference function to generate SR images. """ nn.load_parameters(args.model) # Inference data loader inference_data = inference_data_loader(args.input_dir_lr) input_shape = [ 1, ] + list(inference_data.inputs[0].shape) output_shape = [1, input_shape[1] * 4, input_shape[2] * 4, 3] oh = input_shape[1] - input_shape[1] // 8 * 8 ow = input_shape[2] - input_shape[2] // 8 * 8 # Build the computation graph inputs_raw = nn.Variable(input_shape) pre_inputs = nn.Variable(input_shape) pre_gen = nn.Variable(output_shape) pre_warp = nn.Variable(output_shape) transposed_pre_warp = space_to_depth(pre_warp) inputs_all = F.concatenate(inputs_raw, transposed_pre_warp) with nn.parameter_scope("generator"): gen_output = generator(inputs_all, 3, args.num_resblock) outputs = (gen_output + 1) / 2 inputs_frames = F.concatenate(pre_inputs, inputs_raw) with nn.parameter_scope("fnet"): flow_lr = flow_estimator(inputs_frames) flow_lr = F.pad(flow_lr, (0, 0, 0, oh, 0, ow, 0, 0), "reflect") flow_hr = upscale_four(flow_lr * 4.0) pre_gen_warp = warp_by_flow(pre_gen, flow_hr) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) max_iter = len(inference_data.inputs) print('Frame evaluation starts!!') pre_inputs.d, pre_gen.d, pre_warp.d = 0, 0, 0 for i in range(max_iter): inputs_raw.d = np.array([inference_data.inputs[i]]).astype(np.float32) if i != 0: pre_gen_warp.forward() pre_warp.data.copy_from(pre_gen_warp.data) outputs.forward() output_frame = outputs.d if i >= 5: name, _ = os.path.splitext( os.path.basename(str(inference_data.paths_lr[i]))) filename = args.output_name + '_' + name print('saving image %s' % filename) out_path = os.path.join(args.output_dir, "%s.%s" % (filename, args.output_ext)) save_img(out_path, output_frame[0]) else: # First 5 is a hard-coded symmetric frame padding, ignored but time added! print("Warming up %d" % (5 - i)) pre_inputs.data.copy_from(inputs_raw.data) pre_gen.data.copy_from(outputs.data)
def layer3_1(x): pad3_1 = F.pad(x, (1, 1, 1, 1), 'reflect') conv3_1 = PF.convolution( pad3_1, 128, kernel=( 3, 3), stride=( 1, 1), name='layer3_1.1') conv3_1 = F.instance_normalization( conv3_1, gamma=None, beta=None, channel_axis=1) conv3_1 = PF.prelu(conv3_1, name='layer3_1.3') pad3_2 = F.pad(conv3_1, (1, 1, 1, 1), 'reflect') conv3_2 = PF.convolution( pad3_2, 64, kernel=( 3, 3), stride=( 1, 1), name='layer3_1.5') conv3_2 = F.instance_normalization( conv3_2, gamma=None, beta=None, channel_axis=1) conv3_2 = PF.prelu(conv3_2, name='layer3_1.7') return conv3_2
def construct_networks(args, ops, arch_dict, image, test): """ Construct a network by stacking cells. input: args: arguments set by user. ops: operations used in the network. arch_dict: a dictionary containing architecture information. image: Variable. Input images. test: bool. True if the network is for validation. """ num_of_cells = args.num_cells initial_output_filter = args.output_filter + args.additional_filters_on_retrain num_class = 10 aux_logits = None if not test: image = F.random_crop(F.pad(image, (4, 4, 4, 4)), shape=(image.shape)) image = F.image_augmentation(image, flip_lr=True) image.need_grad = False x = image with nn.parameter_scope("stem_conv1"): stem_1 = PF.convolution(x, initial_output_filter, (3, 3), (1, 1), with_bias=False) stem_1 = PF.batch_normalization(stem_1, batch_stat=not test) cell_prev, cell_prev_prev = stem_1, stem_1 output_filter = initial_output_filter is_reduced_curr, is_reduced_prev = False, False for i in range(num_of_cells): if i in [num_of_cells // 3, 2 * num_of_cells // 3]: output_filter = 2 * output_filter is_reduced_curr = True else: is_reduced_curr = False y, is_reduced_curr, is_reduced_prev, output_filter = \ constructing_learned_cell(args, ops, arch_dict, i, cell_prev_prev, cell_prev, output_filter, is_reduced_curr, is_reduced_prev, test) if i == 2 * num_of_cells // 3 and args.auxiliary and not test: print("Using Aux Tower after cell_{}".format(i)) aux_logits = construct_aux_head(y, num_class) cell_prev, cell_prev_prev = y, cell_prev # shifting y = F.average_pooling(y, y.shape[2:]) # works as global average pooling with nn.parameter_scope("fc"): pred = PF.affine(y, num_class, with_bias=True) return pred, aux_logits
def resblock(x, n=256, test=False, norm_type="batch_norm"): r = x r = F.pad(r, (1, 1, 1, 1), 'reflect') with nn.parameter_scope('block1'): r = PF.convolution(r, n, (3, 3), with_bias=False) if norm_type == "instance_norm": r = PF.instance_normalization(r, eps=1e-05) else: r = PF.batch_normalization(r, batch_stat=not test) r = F.relu(r) r = F.pad(r, (1, 1, 1, 1), 'reflect') with nn.parameter_scope('block2'): r = PF.convolution(r, n, (3, 3), with_bias=False) if norm_type == "instance_norm": r = PF.instance_normalization(r, eps=1e-05) else: r = PF.batch_normalization(r, batch_stat=not test) return x + r
def resnetblock(x, dim, padding_type, norm_layer, use_dropout, use_bias): assert dim == x.shape[ 1], "The number of input / output channels must match." h = x p = 0 if padding_type == 'reflect': h = F.pad(h, (1, 1, 1, 1), 'reflect') elif padding_type == 'zero': p = 1 else: raise NotImplementedError( 'padding {} is not implemented'.format(padding_type)) w_init = I.NormalInitializer(sigma=0.02, rng=None) h = PF.convolution(h, dim, kernel=(3, 3), pad=(p, p), w_init=w_init, with_bias=use_bias, name="1st") h = norm_layer(h, name="1st") h = F.relu(h) if use_dropout: h = F.dropout(h, 0.5) if padding_type == 'reflect': h = F.pad(h, (1, 1, 1, 1), 'reflect') h = PF.convolution(h, dim, kernel=(3, 3), pad=(p, p), w_init=w_init, with_bias=use_bias, name="2nd") h = norm_layer(h, name="2nd") out = F.add2(x, h) return out
def res_block(x, out_ch, name): with nn.parameter_scope(name): residual = x out = F.pad(x, (1, 1, 1, 1), 'reflect') out = PF.convolution( out, out_ch, kernel=( 3, 3), stride=( 1, 1), name='conv1') out = F.instance_normalization( out, gamma=None, beta=None, channel_axis=1) out = PF.prelu(out) out = F.pad(out, (1, 1, 1, 1), 'reflect') out = PF.convolution( out, out_ch, kernel=( 3, 3), stride=( 1, 1), name='conv2') out = F.instance_normalization( out, gamma=None, beta=None, channel_axis=1) out += residual out = PF.prelu(out) return out
def encdec(self, x, n_downsamples): with nn.parameter_scope("first layer"): pad_width = get_symmetric_padwidth(3, channel_last=self.channel_last) h = F.pad(x, pad_width=pad_width, mode=self.padding_type) h = PF.convolution(h, 32, (7, 7), **self.conv_opts) h = self.instance_norm_relu(h) # down sample layers for i in range(n_downsamples): with nn.parameter_scope("down_{}".format(i)): c = 32 * 2**(i + 1) h = PF.convolution(h, c, (3, 3), strides=(2, 2), pad=(1, 1), **self.conv_opts) h = self.instance_norm_relu(h) # up sample layers for i in range(n_downsamples): with nn.parameter_scope("up_{}".format(i)): c = 32 * 2**(n_downsamples - i - 1) h = PF.deconvolution(h, c, (3, 3), stride=(2, 2), pad=(1, 1), **self.conv_opts) h = F.pad(h, pad_width=(0, 1, 0, 1)) # output padding h = self.instance_norm_relu(h) with nn.parameter_scope("last layer"): pad_width = get_symmetric_padwidth(3, channel_last=self.channel_last) h = F.pad(h, pad_width=pad_width, mode=self.padding_type) h = PF.convolution(h, 3, (7, 7), **self.conv_opts) h = F.tanh(h) return h
def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, pad_mode, constant_value): from collections import OrderedDict inputs = OrderedDict() xvar = nn.Variable.from_numpy_array(x) weightvar = nn.Variable.from_numpy_array(weight) inputs['x'] = xvar inputs['weight'] = weightvar biasvar = None betavar = None gammavar = None rmeanvar = None rvarvar = None zvar = None if bias is not None: biasvar = nn.Variable.from_numpy_array(bias) inputs['bias'] = biasvar if beta is not None: betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) inputs['beta'] = betavar inputs['gamma'] = gammavar inputs['rmean'] = rmeanvar inputs['rvar'] = rvarvar if z is not None: zvar = nn.Variable.from_numpy_array(z) inputs['z'] = zvar spatial_dims = xvar.ndim - (base_axis + 1) assert (len(pad) == spatial_dims or len(pad) == 2 * spatial_dims) if len(pad) == spatial_dims: pad_width = tuple(p for _ in range(2) for p in pad) else: # if len(pad) == 2 * spatial_dims: pad_width = pad h = F.pad(xvar, pad_width, pad_mode, constant_value) conv_pad = (0,) * spatial_dims h = F.convolution(h, weightvar, biasvar, base_axis, conv_pad, stride, dilation, group, channel_last) if beta is not None: h = F.batch_normalization(h, betavar, gammavar, rmeanvar, rvarvar, [h.ndim - 1 if channel_last else base_axis], decay_rate, eps, batch_stat) if z is not None: h = F.add2(h, zvar) h = ref_activation(h, nonlinearity, nonlinearity_args) self.input_dict = inputs self.output = h
def call(self, x): hp = self.hp with nn.parameter_scope("first_layer"): x = F.pad(x, (0, 0, 3, 3), 'reflect') x = wn_conv(x, hp.ngf, (7,)) for i, r in enumerate(reversed(hp.ratios)): x = getattr(self, f"block_{i}")(x, r, 2**(i + 1)) with nn.parameter_scope("last_layer"): x = F.gelu(x) x = F.pad(x, (0, 0, 3, 3), 'reflect') x = wn_conv(x, x.shape[1], (7,)) with nn.parameter_scope("content"): x = F.gelu(x) x = F.pad(x, (0, 0, 3, 3), 'reflect') x = wn_conv(x, hp.bottleneck_dim, (7,), with_bias=False) x = x / F.sum(x**2 + 1e-12, axis=1, keepdims=True)**0.5 return x
def build_cost_volume(limg, rimg, maxdisp): left_stack = [] right_stack = [] for i in range(int(maxdisp / 4)): sliced_limg = limg[:, :, :, i:] sliced_rimg = rimg[:, :, :, :limg.shape[3] - i] if i == 0: padded_limg = sliced_limg padded_rimg = sliced_rimg else: # Padd i pixels on the left edge # The shape of padded_* becomes [B, C, H, W] padded_limg = F.pad(sliced_limg, (i, 0)) padded_rimg = F.pad(sliced_rimg, (i, 0)) left_stack.append(padded_limg) right_stack.append(padded_rimg) left_stacked = F.stack(*left_stack, axis=2) # [B, C, D, H, W] right_stacked = F.stack(*right_stack, axis=2) # [B, C, D, H, W] cost_volume = F.concatenate(left_stacked, right_stacked, axis=1) # [B, 2C, D, H, W] return cost_volume
def call(self, x, spk_emb, dilation): dim = x.shape[1] with nn.parameter_scope('shortcut'): s = wn_conv(x, dim, (1, )) with nn.parameter_scope('block'): b = F.pad(x, (0, 0, dilation, dilation), 'reflect') b = wn_conv(b, 2 * dim, (3, ), dilation=(dilation, ), name='conv_1') if spk_emb is not None: b = b + wn_conv(spk_emb, 2 * dim, (1, ), name="spk_emb") b = F.tanh(b[:, :dim, ...]) * F.sigmoid(b[:, dim:, ...]) b = wn_conv(b, dim, (1, ), dilation=(dilation, ), name='conv_2') return s + b
def pad_data_grad_backward(inputs, pad_width, mode='constant', constant_value=0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ if mode != "constant": raise NotImplementedError( "{}_backward (mode!=constant) is not implemented.".format(func['snake_name'])) gdx = inputs[0] gdy = F.pad(gdx, pad_width, mode, constant_value=0) return gdy
def call(self, x, spk_emb): hp = self.hp self.hop_length = np.prod(hp.ratios) mult = int(2 ** len(hp.ratios)) with nn.parameter_scope("upsample"): x = F.pad(x, (0, 0, 3, 3), 'reflect') x = wn_conv(x, mult * hp.ngf, (7,)) with nn.parameter_scope("first_layer"): x = F.gelu(x) x = F.pad(x, (0, 0, 3, 3), 'reflect') x = wn_conv(x, x.shape[1], (7,)) for i, r in enumerate(hp.ratios): x = getattr(self, f"block_{i}")(x, spk_emb, r, mult // (2**i)) with nn.parameter_scope("waveform"): x = F.gelu(x) x = F.pad(x, (0, 0, 3, 3), 'reflect') x = wn_conv(x, 1, (7,)) x = F.tanh(x) return x
def stft(x, window_size, stride, fft_size, window_type='hanning', center=True, pad_mode='reflect'): if window_type == 'hanning': window_func = np.hanning(window_size + 1)[:-1] elif window_type == 'hamming': window_func = np.hamming(window_size + 1)[:-1] elif window_type == 'rectangular' or window_type is None: window_func = np.ones(window_size) else: raise ValueError("Unknown window type {}.".format(window_type)) # pad window if `fft_size > window_size` if fft_size > window_size: diff = fft_size - window_size window_func = np.pad(window_func, (diff // 2, diff - diff // 2), mode='constant') elif fft_size < window_size: raise ValueError( "FFT size has to be as least as large as window size.") # compute STFT filter coefficients mat_r = np.zeros((fft_size // 2 + 1, 1, fft_size)) mat_i = np.zeros((fft_size // 2 + 1, 1, fft_size)) for w in range(fft_size // 2 + 1): for t in range(fft_size): mat_r[w, 0, t] = np.cos(2. * np.pi * w * t / fft_size) mat_i[w, 0, t] = -np.sin(2. * np.pi * w * t / fft_size) conv_r = nn.Variable.from_numpy_array(mat_r * window_func) conv_i = nn.Variable.from_numpy_array(mat_i * window_func) if center: # pad at begin/end (per default this is a reflection padding) p = (fft_size - stride) // 2 x = F.pad(x, (p, p), mode=pad_mode) # compute STFT y_r = F.convolution(x, conv_r, stride=(stride, )) y_i = F.convolution(x, conv_i, stride=(stride, )) return y_r, y_i
def shift(x, ksize=3): maps = x.shape[1] cpg = maps // (ksize**2) x_pad = F.pad(x, (1, 1, 1, 1)) b, c, h, w = x_pad.shape xs = [] # Bottom shift i = 0 xs += [x_pad[:, i * cpg:(i + 1) * cpg, :h - 2, 1:w - 1]] # Top shift i = 1 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 2:, 1:w - 1]] # Right shift i = 2 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 1:h - 1, :w - 2]] # Left shift i = 3 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 1:h - 1, 2:]] # Bottom Right shift i = 4 xs += [x_pad[:, i * cpg:(i + 1) * cpg, :h - 2, :w - 2]] # Bottom Left shift i = 5 xs += [x_pad[:, i * cpg:(i + 1) * cpg, :h - 2, 2:]] # Top Right shift i = 6 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 2:, :w - 2]] # Top Left shift i = 7 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 2:, 2:]] i = 8 xs += [x_pad[:, i * cpg:, 1:h - 1, 1:w - 1]] h = F.concatenate(*xs, axis=1) return h