def conv_block(inp, layer_name, bn_batch_stat, activation, args, init_params): """ Perform, conv, batch norm, nonlinearity, and max pool """ k = 3 stride, no_stride = (2, 2), (1, 1) pad = (1, 1) if init_params is None or layer_name + '/conv/W' not in init_params: if args.max_pool: conv_output = PF.convolution( inp, args.num_filters, (k, k), pad=pad, stride=no_stride, name=layer_name) else: conv_output = PF.convolution( inp, args.num_filters, (k, k), pad=pad, stride=stride, name=layer_name) normed = normalize(conv_output, layer_name, bn_batch_stat, activation, args, init_params) else: if args.max_pool: conv_output = F.convolution( inp, init_params[layer_name + '/conv/W'], init_params[layer_name + '/conv/b'], pad=pad, stride=no_stride) else: conv_output = F.convolution( inp, init_params[layer_name + '/conv/W'], init_params[layer_name + '/conv/b'], pad=pad, stride=stride) normed = normalize(conv_output, layer_name, bn_batch_stat, activation, args, init_params) if args.max_pool: normed = F.max_pooling(normed, stride, stride=stride) return normed
def stft(x, window_size, stride, fft_size, window_type='hanning', center=True, pad_mode='reflect'): if window_type == 'hanning': window_func = np.hanning(window_size + 1)[:-1] elif window_type == 'hamming': window_func = np.hamming(window_size + 1)[:-1] elif window_type == 'rectangular' or window_type is None: window_func = np.ones(window_size) else: raise ValueError("Unknown window type {}.".format(window_type)) # pad window if `fft_size > window_size` if fft_size > window_size: diff = fft_size - window_size window_func = np.pad(window_func, (diff // 2, diff - diff // 2), mode='constant') elif fft_size < window_size: raise ValueError( "FFT size has to be as least as large as window size.") # compute STFT filter coefficients mat_r = np.zeros((fft_size // 2 + 1, 1, fft_size)) mat_i = np.zeros((fft_size // 2 + 1, 1, fft_size)) for w in range(fft_size // 2 + 1): for t in range(fft_size): mat_r[w, 0, t] = np.cos(2. * np.pi * w * t / fft_size) mat_i[w, 0, t] = -np.sin(2. * np.pi * w * t / fft_size) conv_r = nn.Variable.from_numpy_array(mat_r * window_func) conv_i = nn.Variable.from_numpy_array(mat_i * window_func) if center: # pad at begin/end (per default this is a reflection padding) p = (fft_size - stride) // 2 x = F.pad(x, (p, p), mode=pad_mode) # compute STFT y_r = F.convolution(x, conv_r, stride=(stride, )) y_i = F.convolution(x, conv_i, stride=(stride, )) return y_r, y_i
def convolution_data_grad_backward(inputs, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] dy = inputs[1] w0 = inputs[2] ctx = nn.get_current_context() dfw = ConvolutionFilterGrad(ctx, base_axis, pad, stride, dilation, group, channel_last) dfw.wshape = w0.shape gdy = F.convolution(gdx, w0, None, base_axis, pad, stride, dilation, group, channel_last) gw0 = dfw(dy, gdx) return gdy, gw0
def dyn_2d_filter(x, lf_2d, k_sz): """ Dynamic 2d filtering """ with nn.parameter_scope('Dynamic_2D_Filtering'): f_localexpand = nn.Variable.from_numpy_array( np.eye(k_sz[0] * k_sz[1], k_sz[0] * k_sz[1])) f_localexpand = F.reshape( f_localexpand, (k_sz[0], k_sz[1], 1, k_sz[0] * k_sz[1])) # (9,9,1,81)) f_localexpand = F.transpose(f_localexpand, (3, 0, 1, 2)) # (81,9,9,1)) x_sz = x.shape x = F.reshape(x, (x_sz[0], x_sz[1], x_sz[2], 1)) # (1,100,170,1) x_localexpand = F.convolution(x, f_localexpand, stride=(1, 1), pad=(4, 4), channel_last=True) # (1,100,170,81) x_le_sz = x_localexpand.shape x_localexpand = F.reshape( x_localexpand, (x_le_sz[0], x_le_sz[1], x_le_sz[2], 1, x_le_sz[3])) y = F.batch_matmul(x_localexpand, lf_2d) y_sz = y.shape y = F.reshape(y, (y_sz[0], y_sz[1], y_sz[2], y_sz[4])) return y
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, itr=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, sn=True, test=False, init_scale=1.0): """ """ if w_init is None: l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)) l, u = init_scale * l, init_scale * u w_init = UniformInitializer((l, u), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) + tuple(kernel), w_init, not fix_parameters) w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
def ref_grad_binary_connect_convolution(x, w, wb, b, dy, base_axis, pad, stride, dilation, group, quantize_zero_to, **kw): # Set variables vx = nn.Variable(x.shape, need_grad=True) vx.d = x vx.grad.zero() vw = nn.Variable(w.shape, need_grad=True) vw.d = binarize(w, quantize_zero_to) vw.grad.zero() vb = None if b is not None: vb = nn.Variable(b.shape, need_grad=True) vb.d = b vb.grad.zero() # Execute binarized forward and back prop. with nn.auto_forward(): vy = F.convolution(vx, vw, vb, base_axis, pad, stride, dilation, group) vy.backward(dy) # Return grads if b is None: return np.concatenate([vx.g.flat, vw.g.flat]) return np.concatenate([vx.g.flat, vw.g.flat, vb.g.flat])
def vision_transformer(x, input_res, patch_size, v_width, v_layers, v_heads, embed_dim): scale = v_width**-0.5 with nn.parameter_scope("visual"): con1_w = nn.parameter.get_parameter_or_create(name="conv1/W", shape=(v_width, 3, patch_size, patch_size)) x = F.convolution( x, con1_w, bias=None, stride=(patch_size, patch_size)) # shape = [*, width, grid, grid] # shape = [*, width, grid ** 2] x = F.reshape(x, (x.shape[0], x.shape[1], -1)) x = F.transpose(x, (0, 2, 1)) # shape = [*, grid ** 2, width] z = np.zeros((x.shape[0], 1, x.shape[-1])) zeros = nn.Variable.from_numpy_array(z) class_embed = nn.parameter.get_parameter_or_create( name="class_embedding", shape=(v_width, )).reshape( (x.shape[0], 1, v_width)) # shape = [*, grid ** 2 + 1, width] x = F.concatenate(class_embed + zeros, x, axis=1) positional_embedding = nn.parameter.get_parameter_or_create( name='positional_embedding', shape=((input_res // patch_size)**2 + 1, v_width)).reshape( (x.shape[0], x.shape[1], v_width)) x = x + positional_embedding ln_pre_w = nn.parameter.get_parameter_or_create( name="ln_pre/W", shape=(v_width, )).reshape((1, 1, v_width)) ln_pre_b = nn.parameter.get_parameter_or_create( name="ln_pre/b", shape=(v_width, )).reshape((1, 1, v_width)) x = F.layer_normalization(x, ln_pre_b, ln_pre_w, batch_axis=(0, 1)) x = F.transpose(x, (1, 0, 2)) # NLD -> LND x = transformer(x, v_width, v_layers, v_heads) x = F.transpose(x, (1, 0, 2)) # LND -> NLD ln_post_w = nn.parameter.get_parameter_or_create( name="ln_post/W", shape=(v_width, )).reshape((1, 1, v_width)) ln_post_b = nn.parameter.get_parameter_or_create( name="ln_post/b", shape=(v_width, )).reshape((1, 1, v_width)) x = F.slice(x, stop=(x.shape[0], 1, x.shape[2])) x = F.layer_normalization(x, ln_post_b, ln_post_w) if 'proj' in nn.get_parameters(): visual_proj = nn.parameter.get_parameter_or_create( name="proj", shape=(v_width, embed_dim)).reshape( (1, v_width, -1)) x = F.batch_matmul(x, visual_proj) x = x.reshape((-1, embed_dim)) return x
def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0): assert isinstance(x, nn.Variable) or (x, nn.NdArray) k = np.asarray(k, dtype=np.float32) assert x.ndim == 4 inH = x.shape[1] inW = x.shape[2] minorDim = x.shape[3] kernelH, kernelW = k.shape assert inW >= 1 and inH >= 1 assert kernelW >= 1 and kernelH >= 1 assert isinstance(upx, int) and isinstance(upy, int) assert isinstance(downx, int) and isinstance(downy, int) assert isinstance(padx0, int) and isinstance(padx1, int) assert isinstance(pady0, int) and isinstance(pady1, int) x = F.reshape(x, [-1, inH, 1, inW, 1, minorDim], inplace=False) x = F.pad(x, [0, 0, 0, 0, 0, upy - 1, 0, 0, 0, upx - 1, 0, 0]) x = F.reshape(x, [-1, inH * upy, inW * upx, minorDim], inplace=False) x = F.pad(x, [ 0, 0, max(pady0, 0), max(pady1, 0), max(padx0, 0), max(padx1, 0), 0, 0 ]) x = x[:, max(-pady0, 0):x.shape[1] - max(-pady1, 0), max(-padx0, 0):x.shape[2] - max(-padx1, 0), :] # Convolve with filter. x = F.transpose(x, [0, 3, 1, 2]) x = F.reshape( x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1], inplace=False) w = nn.Variable.from_numpy_array(k[np.newaxis, np.newaxis, ::-1, ::-1]) x = F.convolution(x, w) x = F.reshape(x, [ -1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1 ], inplace=False) x = F.transpose(x, [0, 2, 3, 1]) if downx == 1 and downy == 1: return x return x[:, ::downy, ::downx, :]
def connect(self, fname, inputs, args): if fname in ['Convolution', 'Deconvolution']: # TODO: address leading batch dimension args['channel_last'] = True x = inputs[0] w = inputs[1] b = inputs[2] if len(inputs) == 3 else None scope = self.get_parameter_scope(w) with nn.parameter_scope(scope): wd = w.d.copy().transpose(0, 2, 3, 1) w = nn.parameter.get_parameter_or_create('W_cl', wd.shape, wd) o = F.convolution(x, w, b, **args) elif fname == 'BatchNormalization': # TODO: address leading batch dimension x = inputs[0] beta = inputs[1] gamma = inputs[2] mean = inputs[3] var = inputs[4] args['axes'] = [len(x.shape) - 1] scope = self.get_parameter_scope(beta) with nn.parameter_scope(scope): beta_d = beta.d.copy().transpose(0, 2, 3, 1) gamma_d = gamma.d.copy().transpose(0, 2, 3, 1) mean_d = mean.d.copy().transpose(0, 2, 3, 1) var_d = var.d.copy().transpose(0, 2, 3, 1) beta = nn.parameter.get_parameter_or_create( 'beta_cl', beta_d.shape, beta_d, beta.need_grad) gamma = nn.parameter.get_parameter_or_create( 'gamma_cl', gamma_d.shape, gamma_d, gamma.need_grad) mean = nn.parameter.get_parameter_or_create( 'mean_cl', mean_d.shape, mean_d, mean.need_grad) var = nn.parameter.get_parameter_or_create( 'var_cl', var_d.shape, var_d, var.need_grad) o = F.batch_normalization(x, beta, gamma, mean, var, **args) elif fname in ['MaxPooling', 'AveragePooling', 'SumPooling']: args['channel_last'] = True o = self._call_function(fname, inputs, args) elif fname in ['Concatenate']: args['axis'] = len(inputs[0].shape) - 1 o = self._call_function(fname, inputs, args) elif fname == 'Affine': x = inputs[0] _, h_s, w_s, c_s = inputs[0].shape _, b_s = inputs[1].shape wd = inputs[1].d.copy() wd = np.reshape(wd, (c_s, h_s, w_s, b_s)) wd = np.transpose(wd, (1, 2, 0, 3)) wd = np.reshape(wd, (-1, b_s)) w = nn.parameter.get_parameter_or_create('w_cl', wd.shape, wd, False) b = inputs[2] if len(inputs) == 3 else None o = F.affine(x, w, b, **args) else: o = self._call_function(fname, inputs, args) return o
def conv(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], outmaps, kernel=kernel) else: std = calc_normal_std_he_forward(inp.shape[base_axis], outmaps, kernel=kernel) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def conv_layer(conv_input, inmaps, outmaps, kernel_size, downsample=False, bias=True, act=F.leaky_relu, name_scope='Conv'): """ Conv layer for the residual block of the discriminator """ if downsample: k = [1, 3, 3, 1] out = downsample_2d(conv_input, k, factor=2, gain=1, kernel_size=kernel_size) stride = 2 pad = 0 else: stride = 1 pad = kernel_size // 2 out = conv_input init_function = weight_init_fn(shape=(outmaps, inmaps, kernel_size, kernel_size), return_init=True) scale = 1 / np.sqrt(inmaps * kernel_size**2) conv_weight = nn.parameter.get_parameter_or_create( name=f'{name_scope}/W', initializer=init_function, shape=(outmaps, inmaps, kernel_size, kernel_size)) if bias: conv_bias = nn.parameter.get_parameter_or_create( name=f'{name_scope}/b', shape=(outmaps, )) else: conv_bias = None out = F.convolution(out, conv_weight * scale, bias=conv_bias, stride=(stride, stride), pad=(pad, pad)) if act == F.leaky_relu: out = F.mul_scalar(F.leaky_relu(out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False) else: out = act(out) return out
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ N-D Convolution with a bias term. For Dilated Convolution (a.k.a. Atrous Convolusion), refer to: - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915 - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122 Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable`: N-D array. """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, pad_mode, constant_value): from collections import OrderedDict inputs = OrderedDict() xvar = nn.Variable.from_numpy_array(x) weightvar = nn.Variable.from_numpy_array(weight) inputs['x'] = xvar inputs['weight'] = weightvar biasvar = None betavar = None gammavar = None rmeanvar = None rvarvar = None zvar = None if bias is not None: biasvar = nn.Variable.from_numpy_array(bias) inputs['bias'] = biasvar if beta is not None: betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) inputs['beta'] = betavar inputs['gamma'] = gammavar inputs['rmean'] = rmeanvar inputs['rvar'] = rvarvar if z is not None: zvar = nn.Variable.from_numpy_array(z) inputs['z'] = zvar spatial_dims = xvar.ndim - (base_axis + 1) assert (len(pad) == spatial_dims or len(pad) == 2 * spatial_dims) if len(pad) == spatial_dims: pad_width = tuple(p for _ in range(2) for p in pad) else: # if len(pad) == 2 * spatial_dims: pad_width = pad h = F.pad(xvar, pad_width, pad_mode, constant_value) conv_pad = (0,) * spatial_dims h = F.convolution(h, weightvar, biasvar, base_axis, conv_pad, stride, dilation, group, channel_last) if beta is not None: h = F.batch_normalization(h, betavar, gammavar, rmeanvar, rvarvar, [h.ndim - 1 if channel_last else base_axis], decay_rate, eps, batch_stat) if z is not None: h = F.add2(h, zvar) h = ref_activation(h, nonlinearity, nonlinearity_args) self.input_dict = inputs self.output = h
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ N-D Convolution with a bias term. For Dilated Convolution (a.k.a. Atrous Convolusion), refer to: - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915 - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122 Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable`: N-D array. """ if w_init is None: w_init = UniformInitializer( calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create( "b", (outmaps,), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def test_clear_input_if_no_need_grad_convolution(self): x1 = nn.Variable([1, 1, 2], need_grad=True) x2 = nn.Variable([1, 1, 2], need_grad=True) x3 = nn.Variable([1], need_grad=True) inp = F.identity(x1) weight = F.identity(x2) bias = F.identity(x3) y = F.convolution(inp, weight, bias) # (1) answer = [] answer.append([False]) answer.append([False]) answer.append([False]) answer.append([False, False, True]) # (1) clears bias y.forward(clear_no_need_grad=True) self.check_input_data_clear_called_flags(answer)
def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args): from collections import OrderedDict inputs = OrderedDict() xvar = nn.Variable.from_numpy_array(x) weightvar = nn.Variable.from_numpy_array(weight) inputs['x'] = xvar inputs['weight'] = weightvar biasvar = None betavar = None gammavar = None rmeanvar = None rvarvar = None zvar = None if bias is not None: biasvar = nn.Variable.from_numpy_array(bias) inputs['bias'] = biasvar if beta is not None: betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) inputs['beta'] = betavar inputs['gamma'] = gammavar inputs['rmean'] = rmeanvar inputs['rvar'] = rvarvar if z is not None: zvar = nn.Variable.from_numpy_array(z) inputs['z'] = zvar h = F.convolution(xvar, weightvar, biasvar, base_axis, pad, stride, dilation, group, channel_last) if beta is not None: h = F.batch_normalization( h, betavar, gammavar, rmeanvar, rvarvar, [h.ndim - 1 if channel_last else base_axis], decay_rate, eps, batch_stat) if z is not None: h = F.add2(h, zvar) h = ref_activation(h, nonlinearity, nonlinearity_args) self.input_dict = inputs self.output = h
def nn_data_gauss_down_quad(hr_data, sigma=1.5): """ 2D down-scaling by 4 with Gaussian blur sigma: the sigma used for Gaussian blur return: down-scaled data """ k_w = 1 + 2 * int(sigma * 3.0) gau_k = gaussian_2dkernel(k_w, sigma) gau_0 = np.zeros_like(gau_k) gau_wei = np.float32([[gau_k, gau_0, gau_0], [gau_0, gau_k, gau_0], [gau_0, gau_0, gau_k]]) # only works for RGB images! gau_wei = np.transpose(gau_wei, [0, 2, 3, 1]) gau_wei = nn.Variable.from_numpy_array(gau_wei) down_sampled_data = F.convolution(hr_data, weight=gau_wei, stride=(4, 4), channel_last=True) return down_sampled_data
def downsample_conv_2d(x, w, k=None, factor=2, gain=1): assert isinstance(factor, int) and factor >= 1 # Check weight shape. assert w.ndim == 4 convH = w.shape[2] convW = w.shape[3] assert convW == convH # Setup filter kernel. if k is None: k = [1] * factor k = _setup_kernel(k) * (gain * (factor**2)) p = (k.shape[0] - factor) + (convW - 1) # Execute. w = w[:, :, ::-1, ::-1] x = F.convolution(x, w, stride=(factor, factor)) return _simple_upfirdn_2d(x, k, pad0=(p + 1) // 2, pad1=p // 2)
def ref_grad_inq_convolution(x, w, i, b, dy, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed): if inq_iterations[-1] == 0: # last element in `inq_iterations`, quantize all weights i = np.ones_like(i) elif 0 in inq_iterations: # only `largest_abs` is deterministic assert (selection_algorithm == 'largest_abs') idx_var = np.flatnonzero(i == 0) idx_newfix = idx_var[np.argsort(np.abs( w.ravel()[idx_var]))[-(len(idx_var) // 2):]] i.ravel()[idx_newfix] = 1 wq = np.copy(w) if np.any(i == 1): wq[i == 1] = quantize(w[i == 1], np.max(np.abs(w)), num_bits) # Set variables vx = nn.Variable(x.shape, need_grad=True) vx.d = x vx.grad.zero() vw = nn.Variable(w.shape, need_grad=True) vw.d = wq vw.grad.zero() vb = None if b is not None: vb = nn.Variable(b.shape, need_grad=True) vb.d = b vb.grad.zero() # Execute binarized forward and back prop. with nn.auto_forward(): vy = F.convolution(vx, vw, vb, base_axis, pad, stride, dilation, group) vy.backward(dy) # Return grads if b is None: return np.concatenate([vx.g.flat, vw.g.flat]) return np.concatenate([vx.g.flat, vw.g.flat, vb.g.flat])
def anti_alias_interpolate(input, channels, scale): # no trainable parameters exist. if scale == 1.0: # no interpolation executed return F.identity(input) sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 ka = kernel_size // 2 if kernel_size % 2 == 0: kb = ka - 1 else: kb = ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] kernel = 1 xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1)) ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1)) meshgrids = (F.tile(xa, (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1))) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2)) kernel = kernel / F.sum(kernel, keepdims=True) # Reshape to depthwise convolutional weight kernel = F.reshape(kernel, (1, 1) + kernel.shape) kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size)) # if using the pre-computed kernel, no need to compute here. out = F.pad(input, (ka, kb, ka, kb)) out = F.convolution(out, weight=kernel, group=channels) out = F.interpolate(out, scale=(scale, scale), mode="nearest") return out
def compute_each_feat_dist(img0, img1, feat_extractor): """ img0, img1(Variable): shape of (N, 3, H, W). Value ranges should be in [-1., +1.]. feat_extrator(function): backbone network for getting features. """ img0_feats = feat_extractor(img0) # lists of Variables. img1_feats = feat_extractor(img1) # each Variable is the activation. dists = list() for i, (feat0, feat1) in enumerate(zip(img0_feats, img1_feats)): feat0 = unit_normalize(feat0) # normalize. feat1 = unit_normalize(feat1) # retrieve LPIPS weight. lpips_w = nn.parameter.get_parameter_or_create( f'lin{i}_model_1_weight', shape=(1, feat0.shape[1], 1, 1)) # in the paper, it is described as multiplication, # but implemented as 1x1 convolution. dist = F.convolution(F.pow_scalar((feat0 - feat1), 2), lpips_w) dists.append(dist) # store distrance at each layer. return dists
def invertible_conv(x, reverse, rng, scope): r"""Invertible 1x1 Convolution Layer. Args: x (nn.Variable): Input variable. reverse (bool): Whether it's a reverse direction. rng (numpy.random.RandomState): A random generator. scope (str): The scope. Returns: nn.Variable: The output variable. """ batch_size, c, n_groups = x.shape with nn.parameter_scope(scope): # initialize w by an orthonormal matrix w_init = np.linalg.qr(rng.randn(c, c))[0][None, ...] W_var = get_parameter_or_create("W", (1, c, c), w_init, True, True) W = F.batch_inv(W_var) if reverse else W_var x = F.convolution(x, F.reshape(W, (c, c, 1)), None, stride=(1, )) if reverse: return x log_det = batch_size * n_groups * F.log(F.abs(F.batch_det(W))) return x, log_det
def ref_grad_binary_connect_convolution(x, w, wb, b, dy, base_axis, pad, stride, dilation, group): # Set variables vx = nn.Variable(x.shape, need_grad=True) vx.d = x vx.grad.zero() vw = nn.Variable(w.shape, need_grad=True) vw.d = binarize(w) vw.grad.zero() vb = None if b is not None: vb = nn.Variable(b.shape, need_grad=True) vb.d = b vb.grad.zero() # Execute binarized forward and back prop. with nn.auto_forward(): vy = F.convolution(vx, vw, vb, base_axis, pad, stride, dilation, group) vy.backward(dy) # Return grads if b is None: return np.concatenate([vx.g.flat, vw.g.flat]) return np.concatenate([vx.g.flat, vw.g.flat, vb.g.flat])
def masked_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) mask_w = get_parameter_or_create("Mw", w.shape, ConstantInitializer(0.), False) w_masked = w * mask_w b = None b_masked = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) mask_b = get_parameter_or_create("Mb", b.shape, ConstantInitializer(0.), False) b_masked = b * mask_b return F.convolution(inp, w_masked, b_masked, base_axis, pad, stride, dilation, group)
def styled_conv_block(conv_input, w, noise=None, res=4, inmaps=512, outmaps=512, kernel_size=3, pad_size=1, demodulate=True, namescope="Conv", up=False, act=F.leaky_relu): """ Conv block with skip connection for Generator """ batch_size = conv_input.shape[0] with nn.parameter_scope(f'G_synthesis/{res}x{res}/{namescope}'): W, bias = weight_init_fn(shape=(w.shape[1], inmaps)) runtime_coef = (1. / np.sqrt(512)).astype(np.float32) style = F.affine(w, W * runtime_coef, bias) + 1.0 runtime_coef_for_conv = ( 1 / np.sqrt(np.prod([inmaps, kernel_size, kernel_size]))).astype( np.float32) if up: init_function = weight_init_fn(shape=(inmaps, outmaps, kernel_size, kernel_size), return_init=True) conv_weight = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/conv/W', shape=(inmaps, outmaps, kernel_size, kernel_size), initializer=init_function) else: init_function = weight_init_fn(shape=(outmaps, inmaps, kernel_size, kernel_size), return_init=True) conv_weight = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/conv/W', shape=(outmaps, inmaps, kernel_size, kernel_size), initializer=init_function) conv_weight = F.mul_scalar(conv_weight, runtime_coef_for_conv) if up: scale = F.reshape(style, (style.shape[0], style.shape[1], 1, 1, 1), inplace=False) else: scale = F.reshape(style, (style.shape[0], 1, style.shape[1], 1, 1), inplace=False) mod_w = F.mul2( F.reshape(conv_weight, (1, ) + conv_weight.shape, inplace=False), scale) if demodulate: if up: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[1, 3, 4], keepdims=True) + 1e-8, 0.5) else: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[2, 3, 4], keepdims=True) + 1e-8, 0.5) demod_w = F.div2(mod_w, denom_w) else: demod_w = mod_w conv_input = F.reshape(conv_input, (1, -1, conv_input.shape[2], conv_input.shape[3]), inplace=False) demod_w = F.reshape( demod_w, (-1, demod_w.shape[2], demod_w.shape[3], demod_w.shape[4]), inplace=False) if up: k = [1, 3, 3, 1] conv_out = upsample_conv_2d(conv_input, demod_w, k, factor=2, gain=1, group=batch_size) else: conv_out = F.convolution(conv_input, demod_w, pad=(pad_size, pad_size), group=batch_size) conv_out = F.reshape( conv_out, (batch_size, -1, conv_out.shape[2], conv_out.shape[3]), inplace=False) if noise is not None: noise_coeff = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/noise_strength', shape=()) conv_out = F.add2(conv_out, noise * F.reshape(noise_coeff, (1, 1, 1, 1))) else: conv_out = conv_out bias = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/conv/b', shape=(outmaps, ), initializer=np.random.randn(outmaps, ).astype(np.float32)) conv_out = F.add2(conv_out, F.reshape(bias, (1, outmaps, 1, 1), inplace=False)) if act == F.leaky_relu: conv_out = F.mul_scalar(F.leaky_relu(conv_out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False) else: conv_out = act(conv_out) return conv_out
def __call__(self, inp): return F.convolution(inp, self.W, self.b, self.base_axis, self.pad, self.stride, self.dilation, self.group)
def quantized_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, quantization_w=None, quantization_b=None): """Quantized Convolution. Quantized Convolution where the input/output relationship is .. math:: y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} Q_w(w_{n, m, i, j}) x_{m, a + i, b + j} + Q_b(b_n), where :math:`Q_w(w_{n, m, i, j})` is the weight quantization function and :math:`Q_w(b_{n})` is the bias quantization function. .. note:: 1) if you would like to share weights between some layers, please make sure to share the standard, floating value weights (`weight`) and not the quantized weights (`quantized weight`) 2) The weights and the quantized weights become synced only after :func:`~nnabla._variable.Variable.forward` is called, and not after a call to :func:`~nnabla._variable.Variable.backward`. To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the float weights and the quantized weights will not be in sync. 3) CPU and GPU implementations now use float value for `quantized weight`, since this function is only for simulation purposes. Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction. w_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for weight. b_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. quantization_w (function): Quantization function that is applied to the the weights. Use `None` to not quantize the weights. quantization_b (function): Quantization function that is applied to the the bias. Use `None` to not quantize the bias. Returns: :class:`~nnabla.Variable`: N-D array. """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() # Floating Weight w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) + tuple(kernel), w_init, True, not fix_parameters) # Quantize weights if quantization_w is not None: w_q = get_parameter_or_create( "W_q", (outmaps, inp.shape[base_axis] // group) + tuple(kernel), w_init, False) # Link computation graph real_w_q = quantization_w(w) real_w_q.persistent = True w_q.data = real_w_q.data else: real_w_q = w # Bias # Floating b = None b_q = None real_b_q = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, True, not fix_parameters) if quantization_b is not None: b_q = get_parameter_or_create("b_q", (outmaps, ), b_init, False) # Link computation graph real_b_q = quantization_b(b) real_b_q.persistent = True b_q.data = real_b_q.data else: real_b_q = b return F.convolution(inp, real_w_q, real_b_q, base_axis, pad, stride, dilation, group)
def reverse(self, x): weight_inv = nn.Variable(self.weight.shape) weight_inv.d = np.linalg.inv(self.weight.d) out = F.convolution(x, weight_inv) return out
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args with_bias = True if len(inputs) == 4 else False base_axis = self.forward_func.info.args["base_axis"] pad = self.forward_func.info.args["pad"] stride = self.forward_func.info.args["stride"] dilation = self.forward_func.info.args["dilation"] group = self.forward_func.info.args["group"] channel_last = self.forward_func.info.args["channel_last"] # Inputs x0 = inputs[0].data w0 = inputs[1].data b0 = inputs[2].data if with_bias else None dy = inputs[3].data if with_bias else inputs[2].data # Outputs dx0 = outputs[0].data dw0 = outputs[1].data db0 = outputs[2].data if with_bias else None # Grads of inputs g_x0 = inputs[0].grad g_w0 = inputs[1].grad g_b0 = inputs[2].grad if with_bias else None g_dy = inputs[3].grad if with_bias else inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dw0 = outputs[1].grad g_db0 = outputs[2].grad if with_bias else None # Computation ## w.r.t. x or w.r.t. w if prop_down[0] or prop_down[1]: # we can re-use the backward of the forward with different inputs inp_x = nn.Variable(x0.shape).apply(data=g_dx0, grad=g_x0, need_grad=prop_down[0]) inp_w = nn.Variable(w0.shape).apply(data=g_dw0, grad=g_w0, need_grad=prop_down[1]) out_y = nn.Variable(dy.shape).apply(grad=dy) inputs = [inp_x, inp_w] outputs = [out_y] if with_bias: inp_b = nn.Variable(b0.shape).apply(need_grad=False) inputs += [inp_b] self.forward_func.backward(inputs, outputs, accum) ## w.r.t. b if with_bias and prop_down[2] and not accum[2]: zeros = F.constant(0, b0.shape) if not nn.get_auto_forward(): zeros.forward() g_b0.copy_from(zeros.data) ## w.r.t. dy if (not with_bias and prop_down[2]) or (with_bias and prop_down[3]): accum_dy = accum[3] if with_bias else accum[2] g_dy_ = F.convolution(g_dx0, w0, None, base_axis, pad, stride, dilation, group, channel_last) \ + F.convolution(x0, g_dw0, None, base_axis, pad, stride, dilation, group, channel_last) if with_bias: if not channel_last: g_db0 = F.reshape(g_db0, [ 1 if i != base_axis else g_db0.shape[0] for i in range(g_dy.ndim) ]) else: g_db0 = F.reshape(g_db0, [ 1 if i != (g_dy.ndim - 1) else g_db0.shape[0] for i in range(g_dy.ndim) ]) g_dy_ += g_db0 if accum_dy: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def call(self, input): return F.convolution(input, self._W, self._b, self._base_axis, self._pad, self._stride, self._dilation, self._group, self._channel_last)
def conv_block(input, w, noise=None, res=4, outmaps=512, inmaps=512, kernel_size=3, pad_size=1, demodulate=True, namescope="Conv", up=False, act=F.leaky_relu): """ single convoluiton block used in each resolution. """ batch_size = input.shape[0] with nn.parameter_scope(f"G_synthesis/{res}x{res}/{namescope}"): runtime_coef = 1. / np.sqrt(512) W, bias = weight_init_fn(shape=(w.shape[1], inmaps)) runtime_coef = 1. / np.sqrt(512) s = F.affine(w, W * runtime_coef, bias) + 1.0 runtime_coef_for_conv = 1 / \ np.sqrt(np.prod([inmaps, kernel_size, kernel_size])) if up: conv_weight = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/conv/W", shape=(inmaps, outmaps, kernel_size, kernel_size)) else: conv_weight = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/conv/W", shape=(outmaps, inmaps, kernel_size, kernel_size)) conv_weight = conv_weight * runtime_coef_for_conv if up: scale = F.reshape(s, (s.shape[0], s.shape[1], 1, 1, 1), inplace=True) else: scale = F.reshape(s, (s.shape[0], 1, s.shape[1], 1, 1), inplace=True) mod_w = F.mul2( F.reshape(conv_weight, (1, ) + conv_weight.shape, inplace=True), scale) if demodulate: if up: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[1, 3, 4], keepdims=True) + 1e-8, 0.5) else: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[2, 3, 4], keepdims=True) + 1e-8, 0.5) demod_w = F.div2(mod_w, denom_w) else: demod_w = mod_w input = F.reshape(input, (1, -1, input.shape[2], input.shape[3]), inplace=True) demod_w = F.reshape( demod_w, (-1, demod_w.shape[2], demod_w.shape[3], demod_w.shape[4]), inplace=True) if up: k = [1, 3, 3, 1] conv_out = upsample_conv_2d(input, demod_w, k, factor=2, gain=1, group=batch_size) else: conv_out = F.convolution(input, demod_w, pad=(pad_size, pad_size), group=batch_size) conv_out = F.reshape( conv_out, (batch_size, -1, conv_out.shape[2], conv_out.shape[3]), inplace=True) if noise is not None: noise_coeff = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/noise_strength", shape=()) output = conv_out + noise * \ F.reshape(noise_coeff, (1, 1, 1, 1), inplace=False) else: output = conv_out bias = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/conv/b", shape=(outmaps, )) output = output + F.reshape(bias, (1, outmaps, 1, 1), inplace=False) if act == F.leaky_relu: output = F.leaky_relu(output, alpha=0.2) * np.sqrt(2) else: output = act(output) return output