def INByBatchNorm(inp, axes=[1], decay_rate=0.9, eps=1e-5, fix_parameters=True): """Instance Normalization (implemented using BatchNormalization) Instance normalization is equivalent to the batch normalization if a batch size is one, in other words, it normalizes over spatial dimension(s), meaning all dimensions except for the batch and feature dimension. """ assert len(axes) == 1 shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0), not fix_parameters) gamma = get_parameter_or_create("gamma", shape_stat, ConstantInitializer(1), not fix_parameters) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat=True, output_stat=False)
def __call__(self, x): if not isinstance(x, nn._variable.Variable): input_variable = nn.Variable(x.shape) if isinstance(x, np.ndarray): input_variable.d = x else: input_variable.data = x else: input_variable = x features = self.backbone_model(input_variable, test=not self.training, channel_last=self.channel_last) output = [] for head in sorted(self.heads): num_output = self.heads[head] if self.head_conv > 0: with nn.parameter_scope(head + "_conv1"): b_init_param = -2.19 if head == 'hm' else 0.0 w_init_param = torch_initializer( features.shape[self.axes], (3, 3)) if head == 'hm' else self.n_init out = pf_convolution( features, self.head_conv, (3, 3), pad=(1, 1), stride=(1, 1), w_init=w_init_param, b_init=ConstantInitializer(b_init_param), with_bias=True, channel_last=self.channel_last) out = F.relu(out, inplace=True) with nn.parameter_scope(head + "_final"): w_init_param = torch_initializer( features.shape[self.axes], (1, 1)) if head == 'hm' else self.n_init out = pf_convolution( out, num_output, (1, 1), pad=(0, 0), stride=(1, 1), w_init=w_init_param, b_init=ConstantInitializer(b_init_param), with_bias=True, channel_last=self.channel_last) else: with nn.parameter_scope(head + "_final"): w_init_param = torch_initializer( features.shape[self.axes], (1, 1)) if head == 'hm' else self.n_init out = pf_convolution(features, num_output, (1, 1), pad=(0, 0), stride=(1, 1), w_init=w_init_param, with_bias=True, channel_last=self.channel_last) output.append(out) return output
def parametric_fixed_point_quantize_b_xmax(x, sign=True, n_init=8, n_min=2, n_max=16, xmax_init=1, xmax_min=0.001, xmax_max=10, fix_parameters=False): """Parametric version of `fixed_point_quantize` where the bitwidth `b` and dynamic range `xmax` are learnable parameters. Returns: ~nnabla.Variable: N-D array. """ def clip_scalar(v, min_value, max_value): return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value) def broadcast_scalar(v, shape): return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False), shape=shape) def quantize_pow2(v): return 2**F.round(F.log(v) / np.log(2.)) n = get_parameter_or_create("n", (), ConstantInitializer(n_init), need_grad=True, as_need_grad=not fix_parameters) xmax = get_parameter_or_create("xmax", (), ConstantInitializer(xmax_init), need_grad=True, as_need_grad=not fix_parameters) # ensure that bitwidth is in specified range and an integer n = F.round(clip_scalar(n, n_min, n_max)) if sign: n = n - 1 # ensure that dynamic range is in specified range xmax = clip_scalar(xmax, xmax_min, xmax_max) # compute step size from dynamic range and make sure that it is a pow2 d = quantize_pow2(xmax / (2**n - 1)) # compute min/max value that we can represent if sign: xmin = -xmax else: xmin = nn.Variable((1, ), need_grad=False) xmin.d = 0. # broadcast variables to correct size d = broadcast_scalar(d, shape=x.shape) xmin = broadcast_scalar(xmin, shape=x.shape) xmax = broadcast_scalar(xmax, shape=x.shape) # apply fixed-point quantization return d * F.round(F.clip_by_value(x, xmin, xmax) / d)
def inq_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, num_bits=4, inq_iterations=(), selection_algorithm='random', seed=-1, w_init=None, i_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """Incremental Network Quantization Convolution Layer During training, the weights are sequentially quantized to power-of-two values, which allows the training of a multiplierless network. Using `inq_iterations`, one can specify after how many forward passes half of the learnable weights are fixed and quantized to powers-of-two. After reaching the last value in `inq_iterations`, all weights are fixed. For more details, please refer to the reference. Reference: Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization: Towards lossless CNNs with low-precision weights. <https://arxiv.org/abs/1702.03044> Args: inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix. n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. num_bits (int): Number of bits per weight. Value has to be larger than 1 as one bit is already used to code the value "0" inq_iterations (tuple of int): Tuple of iteration numbers at which we fix half of the weights. selection_algorithm (str): Chooses algorithm that is used to decide which weights are fixed. ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly) seed (int): Random seed for INQ algorithm w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight. i_init (~nnabla.initializer.BaseInitializer): Initializer for the indicators (0 ... learnable, 1 ... fixed). b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias. fix_parameters (bool): When set to `True`, the weight and bias will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable` """ if w_init is None: w_init = UniformInitializer( calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if i_init is None: i_init = ConstantInitializer() if b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis]) + tuple(kernel), w_init, not fix_parameters) i = get_parameter_or_create( "I", (outmaps, inp.shape[base_axis]) + tuple(kernel), i_init, False) b = None if with_bias: b = get_parameter_or_create( "b", (outmaps,), b_init, not fix_parameters) return F.inq_convolution(inp, w, i, b, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)
def __call__(self, x, axes=[1], training=True, name=''): shape = [1] * x.ndim m = nn.parameter.get_parameter_or_create('m-{}'.format(name), shape, ConstantInitializer()) M = nn.parameter.get_parameter_or_create('M-{}'.format(name), shape, ConstantInitializer()) y = self._function(x, m, M, training=training) return y
def CCBN(h, y, n_classes, decay_rate=0.999, test=False, fix_parameters=False, coefs=[1.0]): """Categorical Conditional Batch Normaliazation""" # Call the batch normalization once shape_stat = [1 for _ in h.shape] shape_stat[1] = h.shape[1] gamma_tmp = nn.Variable.from_numpy_array(np.ones(shape_stat)) beta_tmp = nn.Variable.from_numpy_array(np.zeros(shape_stat)) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0.0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(1.0), False) h = F.batch_normalization(h, beta_tmp, gamma_tmp, mean, var, decay_rate=decay_rate, batch_stat=not test) # Condition the gamma and beta with the class label b, c = h.shape[0:2] def embed_func(y, initializer): if type(y) != list: o = embed(y, n_classes, c, initializer=initializer, sn=False, test=test) else: y_list = y o = reduce(lambda x, y: x + y, [ coef * embed(y, n_classes, c, initializer=initializer, sn=False, test=test) for coef, y in zip(coefs, y_list) ]) return o with nn.parameter_scope("gamma"): gamma = embed_func(y, ConstantInitializer(1.0)) gamma = F.reshape(gamma, [b, c] + [1 for _ in range(len(h.shape[2:]))]) gamma = F.broadcast(gamma, h.shape) with nn.parameter_scope("beta"): beta = embed_func(y, ConstantInitializer(0.0)) beta = F.reshape(beta, [b, c] + [1 for _ in range(len(h.shape[2:]))]) beta = F.broadcast(beta, h.shape) return gamma * h + beta
def LN(inp, fix_parameters=False): """Layer normalization. """ beta_shape = (1, inp.shape[1], 1, 1) gamma_shape = (1, inp.shape[1], 1, 1) beta = get_parameter_or_create("beta", beta_shape, ConstantInitializer(0), not fix_parameters) gamma = get_parameter_or_create("gamma", gamma_shape, ConstantInitializer(1), not fix_parameters) return f_layer_normalization(inp, beta, gamma)
def __init__(self, n_features, n_dims, axes=[1], decay_rate=0.9, eps=1e-5, output_stat=False, fix_parameters=False, param_init=None, name=''): Module.__init__(self, name=name) self._scope_name = f'<batchnorm at {hex(id(self))}>' assert len(axes) == 1 shape_stat = [1 for _ in range(n_dims)] shape_stat[axes[0]] = n_features if param_init is None: param_init = {} beta_init = param_init.get('beta', ConstantInitializer(0)) gamma_init = param_init.get('gamma', ConstantInitializer(1)) mean_init = param_init.get('mean', ConstantInitializer(0)) var_init = param_init.get('var', ConstantInitializer(1)) if fix_parameters: self._beta = nn.Variable.from_numpy_array(beta_init(shape_stat)) self._gamma = nn.Variable.from_numpy_array(gamma_init(shape_stat)) else: self._beta = Parameter(shape_stat, initializer=beta_init, scope=self._scope_name) self._gamma = Parameter(shape_stat, initializer=gamma_init, scope=self._scope_name) self._mean = Parameter(shape_stat, need_grad=False, initializer=mean_init, scope=self._scope_name) self._var = Parameter(shape_stat, need_grad=False, initializer=var_init, scope=self._scope_name) self._axes = axes self._decay_rate = decay_rate self._eps = eps self._n_features = n_features self._fix_parameters = fix_parameters self._output_stat = output_stat
def batch_normalization(inp, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False): """ Batch normalization layer. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\ y_i &=& \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken. decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 """ assert len(axes) == 1 shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0), True) gamma = get_parameter_or_create("gamma", shape_stat, ConstantInitializer(1), True) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat, output_stat)
def mlp_gradient_synthesizer(x, y=None, test=False): maps = x.shape[1] if y is not None: h = F.one_hot(y, (10, )) h = F.concatenate(*[x, y], axis=1) else: h = x with nn.parameter_scope("gs"): h = act_bn_linear(h, maps, test, name="fc0") h = act_bn_linear(h, maps, test, name="fc1") w_init = ConstantInitializer(0) b_init = ConstantInitializer(0) g_pred = PF.affine(h, maps, w_init=w_init, b_init=b_init, name="fc") g_pred.persistent = True return g_pred
def __init__(self, n_inmaps, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True): if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( n_inmaps, n_outmap), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w_shape = (n_inmaps, n_outmap) w = nn.Variable.from_numpy_array( w_init(w_shape)).apply(need_grad=not fix_parameters) b = None if with_bias: b_shape = (n_outmap, ) b = nn.Variable.from_numpy_array( b_init(b_shape)).apply(need_grad=not fix_parameters) self.W = w self.b = b self.base_axis = base_axis
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, itr=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, sn=True, test=False, init_scale=1.0): """ """ if w_init is None: l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)) l, u = init_scale * l, init_scale * u w_init = UniformInitializer((l, u), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) + tuple(kernel), w_init, not fix_parameters) w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
def __init__(self, in_features, out_features, base_axis=1, w_init=None, b_init=None, rng=None, bias=True, name=''): Module.__init__(self, name=name) self._scope_name = f'<linear at {hex(id(self))}>' if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( in_features, out_features), rng=rng) self._W = Parameter((in_features, out_features), initializer=w_init, scope=self._scope_name) self._b = None if bias: if b_init is None: b_init = ConstantInitializer() self._b = Parameter((out_features, ), initializer=b_init, scope=self._scope_name) self._base_axis = base_axis self._in_features = in_features self._out_features = out_features
def __init__(self, inmaps, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): if w_init is None: w_init = UniformInitializer( calc_uniform_lim_glorot(inmaps, outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w_shape = (outmaps, inmaps // group) + tuple(kernel) w = nn.Variable.from_numpy_array( w_init(w_shape)).apply(need_grad=not fix_parameters) b = None if with_bias: b_shape = (outmaps, ) b = nn.Variable.from_numpy_array( b_init(b_shape)).apply(need_grad=not fix_parameters) self.W = w self.b = b self.base_axis = base_axis self.pad = pad self.stride = stride self.dilation = dilation self.group = group
def attnblock(h, r=8, fix_parameters=False, sn=True, test=False): """Attention block""" x = h # 1x1 convolutions b, c, s0, s1 = h.shape c_r = c // r assert c_r > 0 f_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="f", with_bias=False, sn=sn, test=test) g_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="g", with_bias=False, sn=sn, test=test) h_x = convolution(h, c, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="h", with_bias=False, sn=sn, test=test) # Attend attn = F.batch_matmul(f_x.reshape( [b, c_r, -1]), g_x.reshape([b, c_r, -1]), transpose_a=True) attn = F.softmax(attn, 1) h_x = h_x.reshape([b, c, -1]) o = F.batch_matmul(h_x, attn) o = F.reshape(o, [b, c, s0, s1]) # Shortcut gamma = get_parameter_or_create( "gamma", [1, 1, 1, 1], ConstantInitializer(0.), not fix_parameters) y = gamma * o + x return y
def BN(inp, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False): """Batch Normalization """ shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create( "beta", shape_stat, ConstantInitializer(0), not fix_parameters) gamma = get_parameter_or_create( "gamma", shape_stat, ConstantInitializer(1), not fix_parameters) mean = get_parameter_or_create( "mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create( "var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat, output_stat)
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, itr=1, fix_parameters=False, rng=None, with_bias=True, sn=True, test=False): """ """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) if w_init is None: inmaps = np.prod(inp.shape[base_axis:]) w_init = UniformInitializer( calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) w_sn = spectral_normalization_for_affine( w, itr=itr, test=test) if sn else w b = None if with_bias: b = get_parameter_or_create( "b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w_sn, b, base_axis)
def modify(self, f, inputs): fname = f.info.type_name if not fname in self._fct_set: return # Next or Previous func is not BatchNorm next_func = f.outputs[0].function_references[0] prev_func = f.inputs[0].parent if (prev_func == None or prev_func.info.type_name != 'BatchNormalization') \ and next_func.info.type_name != 'BatchNormalization': return x = inputs[0] w = inputs[1] b = inputs[2] if len(inputs) == 3 else None if b is not None: return scope = self.get_parameter_scope(w) n_outmaps = w.shape[1] if fname == 'Affine' else w.shape[0] with nn.parameter_scope(scope): b = get_parameter_or_create('b', (n_outmaps, ), ConstantInitializer(), True, True) h = self.connect(f, x, w, b) return h
def conv(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], outmaps, kernel=kernel) else: std = calc_normal_std_he_forward(inp.shape[base_axis], outmaps, kernel=kernel) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def __init__(self, in_channels, out_channels, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, channel_last=False, name=''): Module.__init__(self, name=name) self._scope_name = f'<convolution at {hex(id(self))}>' if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( in_channels, out_channels, tuple(kernel)), rng=rng) w_shape = (out_channels, in_channels // group) + tuple(kernel) b_shape = (out_channels, ) self._b = None if with_bias and b_init is None: b_init = ConstantInitializer() if fix_parameters: self._W = nn.Variable.from_numpy_array(w_init(w_shape)) if with_bias: self._b = nn.Variable.from_numpy_array(b_init(b_shape)) else: self._W = Parameter(w_shape, initializer=w_init, scope=self._scope_name) if with_bias: self._b = Parameter(b_shape, initializer=b_init, scope=self._scope_name) self._base_axis = base_axis self._pad = pad self._stride = stride self._dilation = dilation self._group = group self._kernel = kernel self._in_channels = in_channels self._out_channels = out_channels self._channel_last = channel_last self._fix_parameters = fix_parameters self._rng = rng
def _init_beta_gamma(shape, fix_parameters, param_init, no_bias, no_scale): from nnabla.parameter import get_parameter_or_create from nnabla.initializer import ConstantInitializer if no_bias: beta = None else: beta_init = param_init.get('beta', ConstantInitializer(0)) beta = get_parameter_or_create("beta", shape, beta_init, True, not fix_parameters) if no_scale: gamma = None else: gamma_init = param_init.get('gamma', ConstantInitializer(1)) gamma = get_parameter_or_create("gamma", shape, gamma_init, True, not fix_parameters) return beta, gamma
def _get_generator(proto): if proto.type == 'Normal': return NormalInitializer(sigma=proto.multiplier) elif proto.type == 'Uniform': return UniformInitializer(lim=(-proto.multiplier, proto.multiplier)) elif proto.type == 'Constant': return ConstantInitializer(value=proto.multiplier) else: raise ValueError('Generator type "' + proto.type + '" is not supported.')
def _create_variable(v, name, shape): # Create and initialize variables class Variable: pass parameter = v.type == "Parameter" variable_instance = None if parameter: if v.initializer.type == 'Normal': initializer = NormalInitializer(v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Uniform': initializer = UniformInitializer( lim=[-v.initializer.multiplier, v.initializer.multiplier]) elif v.initializer.type == 'UniformAffineGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'UniformConvolutionGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Constant': initializer = ConstantInitializer(value=v.initializer.multiplier) else: initializer = None variable_instance = get_parameter_or_create(name, shape, initializer) else: # create empty variable, memory will be allocated in network.setup() # after network optimization variable_instance = nn.Variable() variable = Variable() variable.name = name variable.parameter = parameter variable.shape = shape variable.variable_instance = variable_instance return variable
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ N-D Convolution with a bias term. For Dilated Convolution (a.k.a. Atrous Convolusion), refer to: - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915 - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122 Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable`: N-D array. """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap) else: std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], n_outmaps), rng=rng) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w, b, base_axis)
def noisy_layer(x, out_size, name): inpt_size = x.shape[1] root_p = np.sqrt(inpt_size) mu_init = UniformInitializer((-1.0 / root_p, 1.0 / root_p)) sig_init = ConstantInitializer(0.5 / root_p) eps_w, eps_b = sample_noise(inpt_size, out_size) with nn.parameter_scope(name): mu_w = get_parameter_or_create('mu_w', (inpt_size, out_size), mu_init) sig_w = get_parameter_or_create('sig_w', (inpt_size, out_size), sig_init) mu_b = get_parameter_or_create('mu_b', (out_size, ), mu_init) sig_b = get_parameter_or_create('sig_b', (out_size, ), sig_init) return F.affine(x, mu_w + sig_w * eps_w, mu_b + sig_b * eps_b)
def masked_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) mask_w = get_parameter_or_create("Mw", w.shape, ConstantInitializer(0.), False) w_masked = w * mask_w b = None b_masked = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) mask_b = get_parameter_or_create("Mb", b.shape, ConstantInitializer(0.), False) b_masked = b * mask_b return F.convolution(inp, w_masked, b_masked, base_axis, pad, stride, dilation, group)
def cnn_gradient_synthesizer(x, y=None, test=False): bs = x.shape[0] maps = x.shape[1] s0, s1 = x.shape[2:] if y is not None: h = F.one_hot(y, (10, )) h = F.reshape(h, (bs, 10, 1, 1)) h = F.broadcast(h, (bs, 10, s0, s1)) h = F.concatenate(*[x, h], axis=1) else: h = x with nn.parameter_scope("gs"): h = act_bn_conv(h, maps, test, name="conv0") w_init = ConstantInitializer(0) b_init = ConstantInitializer(0) g_pred = PF.convolution(h, maps, kernel=(3, 3), pad=(1, 1), w_init=w_init, b_init=b_init, name="conv") g_pred.persistent = True return g_pred
def IN(inp, axes=[1], decay_rate=0.9, eps=1e-5, fix_parameters=True): """Instance Normalization """ if inp.shape[0] == 1: return INByBatchNorm(inp, axes, decay_rate, eps, fix_parameters) b, c = inp.shape[0:2] spacial_shape = inp.shape[2:] shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0), not fix_parameters) gamma = get_parameter_or_create("gamma", shape_stat, ConstantInitializer(1), not fix_parameters) # Instance normalization # normalize over spatial dimensions axis = [i for i in range(len(inp.shape)) if i > 1] mean = F.sum(inp, axis=axis, keepdims=True) / np.prod(axis) var = F.pow_scalar(F.sum(inp - mean, axis=axis, keepdims=True), 2.0) / np.prod(axis) h = (inp - mean) / F.pow_scalar(var + eps, 0.5) return gamma * inp + beta
def __init__(self, n_features, n_dims, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False, param_init=None): assert len(axes) == 1 shape_stat = [1 for _ in range(n_dims)] shape_stat[axes[0]] = n_features if param_init is None: param_init = {} beta_init = param_init.get('beta', ConstantInitializer(0)) gamma_init = param_init.get('gamma', ConstantInitializer(1)) mean_init = param_init.get('mean', ConstantInitializer(0)) var_init = param_init.get('var', ConstantInitializer(1)) beta = nn.Variable.from_numpy_array( beta_init(shape_stat)).apply(need_grad=not fix_parameters) gamma = nn.Variable.from_numpy_array( gamma_init(shape_stat)).apply(need_grad=not fix_parameters) mean = nn.Variable.from_numpy_array(mean_init(shape_stat)) var = nn.Variable.from_numpy_array(var_init(shape_stat)) self.beta = beta self.gamma = gamma self.mean = mean self.var = var self.axes = axes self.decay_rate = decay_rate self.eps = eps self.output_stat = output_stat