def dummy_parametric_function(shape, f=10, i=1, s="dummy"): """Doc""" from nnabla import Variable from nnabla.parameter import get_parameter_or_create from nnabla.initializer import UniformInitializer p1 = get_parameter_or_create("p1", shape, UniformInitializer((-1, 1))) p2 = get_parameter_or_create("p2", shape + (1, ), UniformInitializer((-1, 1))) return Variable(shape)
def _create_variable(v, name, shape): # Create and initialize variables class Variable: pass parameter = v.type == "Parameter" variable_instance = None if parameter: if v.initializer.type == 'Normal': initializer = NormalInitializer(v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Uniform': initializer = UniformInitializer( lim=[-v.initializer.multiplier, v.initializer.multiplier]) elif v.initializer.type == 'UniformAffineGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'UniformConvolutionGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Constant': initializer = ConstantInitializer(value=v.initializer.multiplier) else: initializer = None variable_instance = get_parameter_or_create(name, shape, initializer) else: # create empty variable, memory will be allocated in network.setup() # after network optimization variable_instance = nn.Variable() variable = Variable() variable.name = name variable.parameter = parameter variable.shape = shape variable.variable_instance = variable_instance return variable
def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): w_init = UniformInitializer(calc_uniform_lim_glorot( C, C / 2, kernel=(1, 1)), rng=rng) h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): w_init = UniformInitializer(calc_uniform_lim_glorot( C / 2, C / 2, kernel=(3, 3)), rng=rng) h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): w_init = UniformInitializer(calc_uniform_lim_glorot( C / 2, C, kernel=(1, 1)), rng=rng) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, itr=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, sn=True, test=False, init_scale=1.0): """ """ if w_init is None: l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)) l, u = init_scale * l, init_scale * u w_init = UniformInitializer((l, u), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) + tuple(kernel), w_init, not fix_parameters) w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
def __init__(self, n_inmaps, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True): if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( n_inmaps, n_outmap), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w_shape = (n_inmaps, n_outmap) w = nn.Variable.from_numpy_array( w_init(w_shape)).apply(need_grad=not fix_parameters) b = None if with_bias: b_shape = (n_outmap, ) b = nn.Variable.from_numpy_array( b_init(b_shape)).apply(need_grad=not fix_parameters) self.W = w self.b = b self.base_axis = base_axis
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, itr=1, fix_parameters=False, rng=None, with_bias=True, sn=True, test=False): """ """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) if w_init is None: inmaps = np.prod(inp.shape[base_axis:]) w_init = UniformInitializer( calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) w_sn = spectral_normalization_for_affine( w, itr=itr, test=test) if sn else w b = None if with_bias: b = get_parameter_or_create( "b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w_sn, b, base_axis)
def __init__(self, in_features, out_features, base_axis=1, w_init=None, b_init=None, rng=None, bias=True, name=''): Module.__init__(self, name=name) self._scope_name = f'<linear at {hex(id(self))}>' if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( in_features, out_features), rng=rng) self._W = Parameter((in_features, out_features), initializer=w_init, scope=self._scope_name) self._b = None if bias: if b_init is None: b_init = ConstantInitializer() self._b = Parameter((out_features, ), initializer=b_init, scope=self._scope_name) self._base_axis = base_axis self._in_features = in_features self._out_features = out_features
def affine_norm(inputs, out_channels, base_axis, with_bias, w_init_gain, scope, **kargs): r"""Affine Layer. Args: inputs (nn.Variable): An input variable of shape (B,...) out_channels (int): The number of output channels. base_axis (int): The base axis. with_bias (bool): Whether to use bias. w_init_gain (str): The non-linear function. scope (str): The parameter scope name. Returns: nn.Variable: An output variable. """ with nn.parameter_scope(scope): lim = xavier_uniform_bound(inputs.shape, out_channels, kernel=(1, 1), base_axis=base_axis, nonlinearity=w_init_gain, is_affine=True) w_init = UniformInitializer(lim) return PF.affine(inputs, out_channels, base_axis=base_axis, w_init=w_init, with_bias=with_bias, **kargs)
def embed(inp, n_inputs, n_features, initializer=None, fix_parameters=False, apply_w=None): """ Embed. Embed slices a matrix/tensor with indexing array/tensor. Weights are initialized with :obj:`nnabla.initializer.UniformInitializer` within the range of :math:`-\\sqrt{3}` and :math:`\\sqrt{3}`. Args: x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)` n_inputs : number of possible inputs, words or vocabraries n_features : number of embedding features fix_parameters (bool): When set to `True`, the embedding weight matrix will not be updated. apply_w (function): Lambda, function, or callable object applied to the weights. Returns: ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)` """ if initializer is None: initializer = UniformInitializer((-np.sqrt(3.), np.sqrt(3))) w = get_parameter_or_create("W", [n_inputs, n_features], initializer, True, not fix_parameters) if apply_w is not None: w = apply_w(w) return F.embed(inp, w)
def __init__(self, n_inputs, n_features, w_init=None, fix_parameters=False): if w_init is None: w_init = UniformInitializer((-np.sqrt(3.), np.sqrt(3))) w_shape = (n_input, n_features) w = nn.Variables.from_numpy_array( w_init()).apply(need_grad=not fix_parameters) self.W = w
def __init__(self, inmaps, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): if w_init is None: w_init = UniformInitializer( calc_uniform_lim_glorot(inmaps, outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w_shape = (outmaps, inmaps // group) + tuple(kernel) w = nn.Variable.from_numpy_array( w_init(w_shape)).apply(need_grad=not fix_parameters) b = None if with_bias: b_shape = (outmaps, ) b = nn.Variable.from_numpy_array( b_init(b_shape)).apply(need_grad=not fix_parameters) self.W = w self.b = b self.base_axis = base_axis self.pad = pad self.stride = stride self.dilation = dilation self.group = group
def test_get_parameter_with_initializer(): """Testing with initializer """ import nnabla as nn from nnabla.parameter import get_parameter_or_create nn.clear_parameters() rng = np.random.RandomState(seed=313) shape = (8, 8, 3, 3) # Instnace inherited from BaseInitializer initializer = UniformInitializer(lim=(-1, 1), rng=rng) param1 = get_parameter_or_create('param1', shape, initializer=initializer, need_grad=True) assert np.min(param1.d > -1) and np.max(param1.d < 1) # Numpy array initializer = rng.randn(*shape) param2 = get_parameter_or_create('param2', initializer=initializer, need_grad=True) np.allclose(initializer, param2.d) # Random param3 = get_parameter_or_create('param3', shape, need_grad=True) nn.clear_parameters()
def inq_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, num_bits=4, inq_iterations=(), selection_algorithm='random', seed=-1, w_init=None, i_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """Incremental Network Quantization Convolution Layer During training, the weights are sequentially quantized to power-of-two values, which allows the training of a multiplierless network. Using `inq_iterations`, one can specify after how many forward passes half of the learnable weights are fixed and quantized to powers-of-two. After reaching the last value in `inq_iterations`, all weights are fixed. For more details, please refer to the reference. Reference: Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization: Towards lossless CNNs with low-precision weights. <https://arxiv.org/abs/1702.03044> Args: inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix. n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. num_bits (int): Number of bits per weight. Value has to be larger than 1 as one bit is already used to code the value "0" inq_iterations (tuple of int): Tuple of iteration numbers at which we fix half of the weights. selection_algorithm (str): Chooses algorithm that is used to decide which weights are fixed. ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly) seed (int): Random seed for INQ algorithm w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight. i_init (~nnabla.initializer.BaseInitializer): Initializer for the indicators (0 ... learnable, 1 ... fixed). b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias. fix_parameters (bool): When set to `True`, the weight and bias will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable` """ if w_init is None: w_init = UniformInitializer( calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if i_init is None: i_init = ConstantInitializer() if b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis]) + tuple(kernel), w_init, not fix_parameters) i = get_parameter_or_create( "I", (outmaps, inp.shape[base_axis]) + tuple(kernel), i_init, False) b = None if with_bias: b = get_parameter_or_create( "b", (outmaps,), b_init, not fix_parameters) return F.inq_convolution(inp, w, i, b, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)
def conv(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], outmaps, kernel=kernel) else: std = calc_normal_std_he_forward(inp.shape[base_axis], outmaps, kernel=kernel) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def __init__(self, in_channels, out_channels, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, channel_last=False, name=''): Module.__init__(self, name=name) self._scope_name = f'<convolution at {hex(id(self))}>' if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( in_channels, out_channels, tuple(kernel)), rng=rng) w_shape = (out_channels, in_channels // group) + tuple(kernel) b_shape = (out_channels, ) self._b = None if with_bias and b_init is None: b_init = ConstantInitializer() if fix_parameters: self._W = nn.Variable.from_numpy_array(w_init(w_shape)) if with_bias: self._b = nn.Variable.from_numpy_array(b_init(b_shape)) else: self._W = Parameter(w_shape, initializer=w_init, scope=self._scope_name) if with_bias: self._b = Parameter(b_shape, initializer=b_init, scope=self._scope_name) self._base_axis = base_axis self._pad = pad self._stride = stride self._dilation = dilation self._group = group self._kernel = kernel self._in_channels = in_channels self._out_channels = out_channels self._channel_last = channel_last self._fix_parameters = fix_parameters self._rng = rng
def _get_generator(proto): if proto.type == 'Normal': return NormalInitializer(sigma=proto.multiplier) elif proto.type == 'Uniform': return UniformInitializer(lim=(-proto.multiplier, proto.multiplier)) elif proto.type == 'Constant': return ConstantInitializer(value=proto.multiplier) else: raise ValueError('Generator type "' + proto.type + '" is not supported.')
def convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ N-D Convolution with a bias term. For Dilated Convolution (a.k.a. Atrous Convolusion), refer to: - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915 - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122 Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable`: N-D array. """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def call(self, inputs): r"""Encoder layer. Args: inputs (nn.Variable): An input variable of shape (B, T) indicates indices of character embeddings. Returns: nn.Variable: Output variable of shape (T, B, C). """ hp = self._hparams with nn.parameter_scope('embeddings'): val = np.sqrt(6.0 / (len(hp.vocab) + hp.symbols_embedding_dim)) inputs = PF.embed( inputs, n_inputs=len(hp.vocab), n_features=hp.symbols_embedding_dim, initializer=UniformInitializer(lim=(-val, val))) # (B, T, C=512) with nn.parameter_scope('ngrams'): out = inputs for i in range(hp.encoder_n_convolutions): with nn.parameter_scope(f'filter_{i}'): out = conv_norm(out, out_channels=hp.encoder_embedding_dim, kernel_size=hp.encoder_kernel_size, padding=(hp.encoder_kernel_size - 1) // 2, bias=False, stride=1, dilation=1, w_init_gain='relu', scope='conv_norm', channel_last=True) # (B, C=512, T) out = PF.batch_normalization(out, batch_stat=self.training, axes=[2]) out = F.relu(out) if self.training: # (B, C=512, T) --> (B, T, C=512) out = F.dropout(out, 0.5) with nn.parameter_scope('lstm_encoder'): out = F.transpose(out, (1, 0, 2)) # (2, 0, 1)) h = F.constant(shape=(2, 2, hp.batch_size, hp.encoder_embedding_dim // 2)) c = F.constant(shape=(2, 2, hp.batch_size, hp.encoder_embedding_dim // 2)) out, _, _ = PF.lstm(out, h, c, training=self.training, bidirectional=True) return out # (T, B, C=512)
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap) else: std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], n_outmaps), rng=rng) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w, b, base_axis)
def test_param_name(p_name): import nnabla as nn rng = np.random.RandomState(seed=313) shape = (8, 8, 3, 3) pe_name = p_name.split('/')[-1] nn.clear_parameters() initializer = UniformInitializer(lim=(-1, 1), rng=rng) param1 = nn.parameter.get_parameter_or_create(p_name, shape, initializer=initializer, need_grad=True) assert param1.name == pe_name
def noisy_layer(x, out_size, name): inpt_size = x.shape[1] root_p = np.sqrt(inpt_size) mu_init = UniformInitializer((-1.0 / root_p, 1.0 / root_p)) sig_init = ConstantInitializer(0.5 / root_p) eps_w, eps_b = sample_noise(inpt_size, out_size) with nn.parameter_scope(name): mu_w = get_parameter_or_create('mu_w', (inpt_size, out_size), mu_init) sig_w = get_parameter_or_create('sig_w', (inpt_size, out_size), sig_init) mu_b = get_parameter_or_create('mu_b', (out_size, ), mu_init) sig_b = get_parameter_or_create('sig_b', (out_size, ), sig_init) return F.affine(x, mu_w + sig_w * eps_w, mu_b + sig_b * eps_b)
def embed(inp, n_inputs, n_features): """ Embed. Embed slices a matrix/tensor with indexing array/tensor Args: x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)` n_inputs : number of possible inputs, words or vocabraries n_features : number of embedding features Returns: ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)` """ w = get_parameter_or_create("W", [n_inputs, n_features], UniformInitializer((-np.sqrt(3.), np.sqrt(3))), True) return F.embed(inp, w)
def deconvolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ Deconvolution layer. Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of deconvolution kernels (which is equal to the number of output channels). For example, to apply deconvolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply deconvolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable`: N-D array. """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( outmaps, inp.shape[base_axis], tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (inp.shape[base_axis], outmaps / group) + tuple(kernel), w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.deconvolution(inp, w, b, base_axis, pad, stride, dilation, group)
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True): """ The affine layer, also known as the fully connected layer. Computes .. math:: {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}. where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively, and :math:`{\\mathbf A}, {\\mathbf b}` are constants. Args: inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix. n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) if w_init is None: inmaps = np.prod(inp.shape[base_axis:]) w_init = UniformInitializer(calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w, b, base_axis)
def discriminator(x, y, scopename="discriminator", maps=64, n_classes=1000, s=4, test=False, sn=True): with nn.parameter_scope(scopename): # Resblocks h = optblock_d(x, y, "block-1", n_classes, maps * 1, test=test, sn=sn) h = resblock_d(h, y, "block-2", n_classes, maps * 2, test=test, sn=sn) h = attnblock(h, sn=sn, test=test) h = resblock_d(h, y, "block-3", n_classes, maps * 4, test=test, sn=sn) h = resblock_d(h, y, "block-4", n_classes, maps * 8, test=test, sn=sn) h = resblock_d(h, y, "block-5", n_classes, maps * 16, test=test, sn=sn) h = resblock_d(h, y, "block-6", n_classes, maps * 16, downsample=False, test=test, sn=sn) # Last affine #h = F.leaky_relu(h, 0.2) h = F.relu(h) h = F.sum(h, axis=(2, 3)) o0 = affine(h, 1, sn=sn, test=test) # Project discriminator l, u = calc_uniform_lim_glorot(n_classes, maps * 16) e = embed(y, n_classes, maps * 16, initializer=UniformInitializer((l, u)), name="projection", sn=sn, test=test) o1 = F.sum(h * e, axis=1, keepdims=True) return o0 + o1
def masked_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """ """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if with_bias and b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) mask_w = get_parameter_or_create("Mw", w.shape, ConstantInitializer(0.), False) w_masked = w * mask_w b = None b_masked = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) mask_b = get_parameter_or_create("Mb", b.shape, ConstantInitializer(0.), False) b_masked = b * mask_b return F.convolution(inp, w_masked, b_masked, base_axis, pad, stride, dilation, group)
def conv_norm(inputs, out_channels, kernel_size, stride, padding, dilation, bias, w_init_gain, scope, **kargs): r"""1D convolutional layer. Args: inputs (nn.Variable): An input variable of shape (B, C, T). out_channels (int): The number of ouput channels. kernel_size (int): The kernel size. stride (int): The stride. padding (int): The number of paddings. dilation (int): The dilation. bias (bool): Whether bias is used. w_init_gain (str): The non-linear function. scope (str): The parameter scope name. Returns: nn.Variable: An output variable. """ with nn.parameter_scope(scope): base_axis = len(inputs.shape) - \ 1 if kargs.get('channel_last', False) else 1 lim = xavier_uniform_bound(inputs.shape, out_channels, (kernel_size, ), base_axis, nonlinearity=w_init_gain, is_affine=False) w_init = UniformInitializer(lim) out = PF.convolution(inputs, out_channels, kernel=(kernel_size, ), stride=(stride, ), pad=(padding, ), w_init=w_init, dilation=(dilation, ), with_bias=bias, **kargs) return out
def cifar10_resnet23_prediction(image, ctx, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): w_init = UniformInitializer(calc_uniform_lim_glorot( C, C / 2, kernel=(1, 1)), rng=rng) h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): w_init = UniformInitializer(calc_uniform_lim_glorot( C / 2, C / 2, kernel=(3, 3)), rng=rng) h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): w_init = UniformInitializer(calc_uniform_lim_glorot( C / 2, C, kernel=(1, 1)), rng=rng) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices rng = np.random.RandomState(0) nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False w_init = UniformInitializer(calc_uniform_lim_glorot(3, nmaps, kernel=(3, 3)), rng=rng) h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", rng, False) # -> 32x32 h = res_unit(h, "conv3", rng, True) # -> 16x16 h = res_unit(h, "conv4", rng, False) # -> 16x16 h = res_unit(h, "conv5", rng, True) # -> 8x8 h = res_unit(h, "conv6", rng, False) # -> 8x8 h = res_unit(h, "conv7", rng, True) # -> 4x4 h = res_unit(h, "conv8", rng, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 w_init = UniformInitializer(calc_uniform_lim_glorot(int( np.prod(h.shape[1:])), ncls, kernel=(1, 1)), rng=rng) pred = PF.affine(h, ncls, w_init=w_init) return pred
def binary_connect_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, wb_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """Binary Connect Convolution, multiplier-less inner-product. Binary Connect Convolution is the convolution function, except the definition of the inner product is modified. The input-output relation of this function is as follows: .. math:: y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}. Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product simplifies to addition. This function should be used together with BatchNormalization. References: M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect: Training Deep Neural Networks with binary weights during propagations." Advances in Neural Information Processing Systems. 2015. .. note:: 1) if you would like to share weights between some layers, please make sure to share the standard, floating value weights (`weight`) and not the binarized weights (`binary_weight`) 2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called, and not after a call to :func:`~nnabla._variable.Variable.backward`. To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the float weights and the binary weights will not be in sync. 3) CPU and GPU implementations now use float value for `binary_weight`, since this function is only for simulation purposes. Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable` """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if wb_init is None: wb_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis]) + tuple(kernel), w_init, not fix_parameters) wb = get_parameter_or_create("Wb", (outmaps, inp.shape[base_axis]) + tuple(kernel), w_init, not fix_parameters) b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.binary_connect_convolution(inp, w, wb, b, base_axis, pad, stride, dilation, group)
def binary_weight_convolution(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, wb_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True): """Binary Weight Convolution, multiplier-less inner-product with a scale factor. Binary Weight Convolution is the convolution function, but the inner product in this function is the following, .. math:: y_{n, a, b} = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}. Therefore :math:`sign(w_{n, m, i, j})` is either :math:`1` or :math:`-1` and the inner product simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}}`. The number of :math:`n` is the number of outmaps of the convolution function. References: Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks." arXiv preprint arXiv:1603.05279 (2016). .. note:: 1) if you would like to share weights between some layers, please make sure to share the standard, floating value weights (`weight`) and not the binarized weights (`binary_weight`) 2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called, and not after a call to :func:`~nnabla._variable.Variable.backward`. To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the float weights and the binary weights will not be in sync. 3) CPU and GPU implementations now use float value for `binary_weight`, since this function is only for simulation purposes. Args: inp (~nnabla.Variable): N-D array. outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5). pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction. w_init (~nnabla.initializer.BaseInitializer): Initializer for weight. wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight. b_init (~nnabla.initializer.BaseInitializer): Initializer for bias. base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions. fix_parameters (bool): When set to `True`, the weights and biases will not be updated. rng (numpy.random.RandomState): Random generator for Initializer. with_bias (bool): Specify whether to include the bias term. Returns: :class:`~nnabla.Variable` """ if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if wb_init is None: wb_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) if b_init is None: b_init = ConstantInitializer() w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis]) + tuple(kernel), w_init, not fix_parameters) wb = get_parameter_or_create("Wb", (outmaps, inp.shape[base_axis]) + tuple(kernel), w_init, not fix_parameters) alpha = get_parameter_or_create("alpha", (outmaps, ), ConstantInitializer(0), False) b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.binary_weight_convolution(inp, w, wb, alpha, b, base_axis, pad, stride, dilation, group)