Example #1
0
def dummy_parametric_function(shape, f=10, i=1, s="dummy"):
    """Doc"""
    from nnabla import Variable
    from nnabla.parameter import get_parameter_or_create
    from nnabla.initializer import UniformInitializer
    p1 = get_parameter_or_create("p1", shape, UniformInitializer((-1, 1)))
    p2 = get_parameter_or_create("p2", shape + (1, ),
                                 UniformInitializer((-1, 1)))
    return Variable(shape)
Example #2
0
def _create_variable(v, name, shape):
    # Create and initialize variables
    class Variable:
        pass

    parameter = v.type == "Parameter"
    variable_instance = None
    if parameter:
        if v.initializer.type == 'Normal':
            initializer = NormalInitializer(v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Uniform':
            initializer = UniformInitializer(
                lim=[-v.initializer.multiplier, v.initializer.multiplier])
        elif v.initializer.type == 'UniformAffineGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'UniformConvolutionGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Constant':
            initializer = ConstantInitializer(value=v.initializer.multiplier)
        else:
            initializer = None
        variable_instance = get_parameter_or_create(name, shape, initializer)
    else:
        # create empty variable, memory will be allocated in network.setup()
        # after network optimization
        variable_instance = nn.Variable()

    variable = Variable()
    variable.name = name
    variable.parameter = parameter
    variable.shape = shape
    variable.variable_instance = variable_instance

    return variable
    def res_unit(x, scope_name, rng, dn=False, test=False):
        C = x.shape[1]
        with nn.parameter_scope(scope_name):

            # Conv -> BN -> Relu
            with nn.parameter_scope("conv1"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C, C / 2, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(x,
                                   C / 2,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN -> Relu
            with nn.parameter_scope("conv2"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C / 2, kernel=(3, 3)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C / 2,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN
            with nn.parameter_scope("conv3"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
            # Residual -> Relu
            h = F.relu(h + x)

            # Maxpooling
            if dn:
                h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

            return h
Example #4
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                itr=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True,
                sn=True,
                test=False,
                init_scale=1.0):
    """
    """
    if w_init is None:
        l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps,
                                       tuple(kernel))
        l, u = init_scale * l, init_scale * u
        w_init = UniformInitializer((l, u), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) +
                                tuple(kernel), w_init, not fix_parameters)
    w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
Example #5
0
    def __init__(self,
                 n_inmaps,
                 n_outmaps,
                 base_axis=1,
                 w_init=None,
                 b_init=None,
                 fix_parameters=False,
                 rng=None,
                 with_bias=True):
        if not hasattr(n_outmaps, '__iter__'):
            n_outmaps = [n_outmaps]
        n_outmaps = list(n_outmaps)
        n_outmap = int(np.prod(n_outmaps))
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                n_inmaps, n_outmap),
                                        rng=rng)
        if with_bias and b_init is None:
            b_init = ConstantInitializer()
        w_shape = (n_inmaps, n_outmap)
        w = nn.Variable.from_numpy_array(
            w_init(w_shape)).apply(need_grad=not fix_parameters)
        b = None
        if with_bias:
            b_shape = (n_outmap, )
            b = nn.Variable.from_numpy_array(
                b_init(b_shape)).apply(need_grad=not fix_parameters)

        self.W = w
        self.b = b
        self.base_axis = base_axis
Example #6
0
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           itr=1,
           fix_parameters=False, rng=None, with_bias=True,
           sn=True, test=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    w_sn = spectral_normalization_for_affine(
        w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w_sn, b, base_axis)
Example #7
0
    def __init__(self,
                 in_features,
                 out_features,
                 base_axis=1,
                 w_init=None,
                 b_init=None,
                 rng=None,
                 bias=True,
                 name=''):
        Module.__init__(self, name=name)
        self._scope_name = f'<linear at {hex(id(self))}>'

        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                in_features, out_features),
                                        rng=rng)
        self._W = Parameter((in_features, out_features),
                            initializer=w_init,
                            scope=self._scope_name)
        self._b = None

        if bias:
            if b_init is None:
                b_init = ConstantInitializer()
            self._b = Parameter((out_features, ),
                                initializer=b_init,
                                scope=self._scope_name)

        self._base_axis = base_axis
        self._in_features = in_features
        self._out_features = out_features
Example #8
0
def affine_norm(inputs, out_channels, base_axis, with_bias, w_init_gain, scope,
                **kargs):
    r"""Affine Layer.

    Args:
        inputs (nn.Variable): An input variable of shape (B,...)
        out_channels (int): The number of output channels.
        base_axis (int): The base axis.
        with_bias (bool): Whether to use bias.
        w_init_gain (str): The non-linear function.
        scope (str): The parameter scope name.

    Returns:
        nn.Variable: An output variable.
    """
    with nn.parameter_scope(scope):
        lim = xavier_uniform_bound(inputs.shape,
                                   out_channels,
                                   kernel=(1, 1),
                                   base_axis=base_axis,
                                   nonlinearity=w_init_gain,
                                   is_affine=True)
        w_init = UniformInitializer(lim)
        return PF.affine(inputs,
                         out_channels,
                         base_axis=base_axis,
                         w_init=w_init,
                         with_bias=with_bias,
                         **kargs)
Example #9
0
def embed(inp,
          n_inputs,
          n_features,
          initializer=None,
          fix_parameters=False,
          apply_w=None):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor. Weights are
    initialized with :obj:`nnabla.initializer.UniformInitializer` within
    the range of :math:`-\\sqrt{3}` and :math:`\\sqrt{3}`.

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape
            :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
        fix_parameters (bool): When set to `True`, the embedding weight matrix
            will not be updated.
        apply_w (function): Lambda, function, or callable object applied to
            the weights.

    Returns:
        ~nnabla.Variable: Output with shape
            :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    if initializer is None:
        initializer = UniformInitializer((-np.sqrt(3.), np.sqrt(3)))
    w = get_parameter_or_create("W", [n_inputs, n_features], initializer, True,
                                not fix_parameters)
    if apply_w is not None:
        w = apply_w(w)
    return F.embed(inp, w)
Example #10
0
 def __init__(self, n_inputs, n_features, w_init=None, fix_parameters=False):
     if w_init is None:
         w_init = UniformInitializer((-np.sqrt(3.), np.sqrt(3)))
     w_shape = (n_input, n_features)
     w = nn.Variables.from_numpy_array(
         w_init()).apply(need_grad=not fix_parameters)
     self.W = w
Example #11
0
    def __init__(self, inmaps, outmaps, kernel,
                 pad=None, stride=None, dilation=None, group=1,
                 w_init=None, b_init=None,
                 base_axis=1, fix_parameters=False, rng=None, with_bias=True):
        if w_init is None:
            w_init = UniformInitializer(
                calc_uniform_lim_glorot(inmaps, outmaps, tuple(kernel)), rng=rng)
        if with_bias and b_init is None:
            b_init = ConstantInitializer()
        w_shape = (outmaps, inmaps // group) + tuple(kernel)
        w = nn.Variable.from_numpy_array(
            w_init(w_shape)).apply(need_grad=not fix_parameters)
        b = None
        if with_bias:
            b_shape = (outmaps, )
            b = nn.Variable.from_numpy_array(
                b_init(b_shape)).apply(need_grad=not fix_parameters)

        self.W = w
        self.b = b
        self.base_axis = base_axis
        self.pad = pad
        self.stride = stride
        self.dilation = dilation
        self.group = group
Example #12
0
def test_get_parameter_with_initializer():
    """Testing with initializer
    """
    import nnabla as nn
    from nnabla.parameter import get_parameter_or_create
    nn.clear_parameters()
    rng = np.random.RandomState(seed=313)
    shape = (8, 8, 3, 3)

    # Instnace inherited from BaseInitializer
    initializer = UniformInitializer(lim=(-1, 1), rng=rng)
    param1 = get_parameter_or_create('param1',
                                     shape,
                                     initializer=initializer,
                                     need_grad=True)
    assert np.min(param1.d > -1) and np.max(param1.d < 1)

    # Numpy array
    initializer = rng.randn(*shape)
    param2 = get_parameter_or_create('param2',
                                     initializer=initializer,
                                     need_grad=True)
    np.allclose(initializer, param2.d)

    # Random
    param3 = get_parameter_or_create('param3', shape, need_grad=True)

    nn.clear_parameters()
Example #13
0
def inq_convolution(inp, outmaps, kernel,
                    pad=None, stride=None, dilation=None, group=1,
                    num_bits=4, inq_iterations=(), selection_algorithm='random',
                    seed=-1, w_init=None, i_init=None, b_init=None,
                    base_axis=1, fix_parameters=False, rng=None,
                    with_bias=True):
    """Incremental Network Quantization Convolution Layer

    During training, the weights are sequentially quantized to power-of-two
    values, which allows the training of a multiplierless network.

    Using `inq_iterations`, one can specify after how many forward passes
    half of the learnable weights are fixed and quantized to powers-of-two.
    After reaching the last value in `inq_iterations`, all weights are fixed.

    For more details, please refer to the reference.

    Reference:
    Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization:
    Towards lossless CNNs with low-precision weights.
    <https://arxiv.org/abs/1702.03044>

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        num_bits (int): Number of bits per weight. Value has to be larger than 1 as one bit is already used to code the value "0"
        inq_iterations (tuple of int): Tuple of iteration numbers at which we fix half of the weights.
        selection_algorithm (str): Chooses algorithm that is used to decide which weights are fixed. ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly)
        seed (int): Random seed for INQ algorithm
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        i_init (~nnabla.initializer.BaseInitializer): Initializer for the indicators (0 ... learnable, 1 ... fixed).
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if i_init is None:
        i_init = ConstantInitializer()
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    i = get_parameter_or_create(
        "I", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        i_init, False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.inq_convolution(inp, w, i, b, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)
Example #14
0
def conv(inp,
         outmaps,
         kernel,
         pad=None,
         stride=None,
         dilation=None,
         group=1,
         w_init=None,
         b_init=None,
         base_axis=1,
         fix_parameters=False,
         rng=None,
         with_bias=True,
         use_wscale=True,
         use_he_backward=False):
    """
    """
    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis],
                                          outmaps,
                                          kernel=kernel)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis],
                                         outmaps,
                                         kernel=kernel)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], outmaps, tuple(kernel)),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)

    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Example #15
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel,
                 pad=None,
                 stride=None,
                 dilation=None,
                 group=1,
                 w_init=None,
                 b_init=None,
                 base_axis=1,
                 fix_parameters=False,
                 rng=None,
                 with_bias=True,
                 channel_last=False,
                 name=''):

        Module.__init__(self, name=name)
        self._scope_name = f'<convolution at {hex(id(self))}>'

        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                in_channels, out_channels, tuple(kernel)),
                                        rng=rng)

        w_shape = (out_channels, in_channels // group) + tuple(kernel)
        b_shape = (out_channels, )

        self._b = None
        if with_bias and b_init is None:
            b_init = ConstantInitializer()

        if fix_parameters:
            self._W = nn.Variable.from_numpy_array(w_init(w_shape))
            if with_bias:
                self._b = nn.Variable.from_numpy_array(b_init(b_shape))
        else:
            self._W = Parameter(w_shape,
                                initializer=w_init,
                                scope=self._scope_name)
            if with_bias:
                self._b = Parameter(b_shape,
                                    initializer=b_init,
                                    scope=self._scope_name)

        self._base_axis = base_axis
        self._pad = pad
        self._stride = stride
        self._dilation = dilation
        self._group = group
        self._kernel = kernel
        self._in_channels = in_channels
        self._out_channels = out_channels
        self._channel_last = channel_last
        self._fix_parameters = fix_parameters
        self._rng = rng
Example #16
0
def _get_generator(proto):
    if proto.type == 'Normal':
        return NormalInitializer(sigma=proto.multiplier)
    elif proto.type == 'Uniform':
        return UniformInitializer(lim=(-proto.multiplier, proto.multiplier))
    elif proto.type == 'Constant':
        return ConstantInitializer(value=proto.multiplier)
    else:
        raise ValueError('Generator type "' +
                         proto.type + '" is not supported.')
Example #17
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True):
    """
    N-D Convolution with a bias term.

    For Dilated Convolution (a.k.a. Atrous Convolusion), refer to:

    - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915

    - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Example #18
0
    def call(self, inputs):
        r"""Encoder layer.
        Args:
            inputs (nn.Variable): An input variable of shape (B, T) indicates indices
                of character embeddings.

        Returns:
            nn.Variable: Output variable of shape (T, B, C).
        """
        hp = self._hparams
        with nn.parameter_scope('embeddings'):
            val = np.sqrt(6.0 / (len(hp.vocab) + hp.symbols_embedding_dim))
            inputs = PF.embed(
                inputs,
                n_inputs=len(hp.vocab),
                n_features=hp.symbols_embedding_dim,
                initializer=UniformInitializer(lim=(-val,
                                                    val)))  # (B, T, C=512)

        with nn.parameter_scope('ngrams'):
            out = inputs
            for i in range(hp.encoder_n_convolutions):
                with nn.parameter_scope(f'filter_{i}'):
                    out = conv_norm(out,
                                    out_channels=hp.encoder_embedding_dim,
                                    kernel_size=hp.encoder_kernel_size,
                                    padding=(hp.encoder_kernel_size - 1) // 2,
                                    bias=False,
                                    stride=1,
                                    dilation=1,
                                    w_init_gain='relu',
                                    scope='conv_norm',
                                    channel_last=True)  # (B, C=512, T)
                    out = PF.batch_normalization(out,
                                                 batch_stat=self.training,
                                                 axes=[2])
                    out = F.relu(out)
                    if self.training:
                        # (B, C=512, T) --> (B, T, C=512)
                        out = F.dropout(out, 0.5)

        with nn.parameter_scope('lstm_encoder'):
            out = F.transpose(out, (1, 0, 2))  # (2, 0, 1))
            h = F.constant(shape=(2, 2, hp.batch_size,
                                  hp.encoder_embedding_dim // 2))
            c = F.constant(shape=(2, 2, hp.batch_size,
                                  hp.encoder_embedding_dim // 2))
            out, _, _ = PF.lstm(out,
                                h,
                                c,
                                training=self.training,
                                bidirectional=True)

        return out  # (T, B, C=512)
Example #19
0
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True,
           use_wscale=True,
           use_he_backward=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))

    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], n_outmaps),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)

    return F.affine(inp, w, b, base_axis)
Example #20
0
def test_param_name(p_name):
    import nnabla as nn
    rng = np.random.RandomState(seed=313)
    shape = (8, 8, 3, 3)

    pe_name = p_name.split('/')[-1]
    nn.clear_parameters()
    initializer = UniformInitializer(lim=(-1, 1), rng=rng)
    param1 = nn.parameter.get_parameter_or_create(p_name,
                                                  shape,
                                                  initializer=initializer,
                                                  need_grad=True)
    assert param1.name == pe_name
Example #21
0
def noisy_layer(x, out_size, name):
    inpt_size = x.shape[1]
    root_p = np.sqrt(inpt_size)
    mu_init = UniformInitializer((-1.0 / root_p, 1.0 / root_p))
    sig_init = ConstantInitializer(0.5 / root_p)
    eps_w, eps_b = sample_noise(inpt_size, out_size)
    with nn.parameter_scope(name):
        mu_w = get_parameter_or_create('mu_w', (inpt_size, out_size), mu_init)
        sig_w = get_parameter_or_create('sig_w', (inpt_size, out_size),
                                        sig_init)
        mu_b = get_parameter_or_create('mu_b', (out_size, ), mu_init)
        sig_b = get_parameter_or_create('sig_b', (out_size, ), sig_init)
    return F.affine(x, mu_w + sig_w * eps_w, mu_b + sig_b * eps_b)
Example #22
0
def embed(inp, n_inputs, n_features):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
    Returns:
        ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    w = get_parameter_or_create("W", [n_inputs, n_features],
                                UniformInitializer((-np.sqrt(3.), np.sqrt(3))), True)
    return F.embed(inp, w)
Example #23
0
def deconvolution(inp,
                  outmaps,
                  kernel,
                  pad=None,
                  stride=None,
                  dilation=None,
                  group=1,
                  w_init=None,
                  b_init=None,
                  base_axis=1,
                  fix_parameters=False,
                  rng=None,
                  with_bias=True):
    """
    Deconvolution layer.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of deconvolution kernels (which is equal to the number of output channels). For example, to apply deconvolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply deconvolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            outmaps, inp.shape[base_axis], tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (inp.shape[base_axis], outmaps / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.deconvolution(inp, w, b, base_axis, pad, stride, dilation, group)
Example #24
0
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True):
    """
    The affine layer, also known as the fully connected layer. Computes

    .. math::
        {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}.

    where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively,
    and :math:`{\\mathbf A}, {\\mathbf b}` are constants.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(calc_uniform_lim_glorot(inmaps, n_outmap),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] +
                                n_outmaps, w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w, b, base_axis)
Example #25
0
def discriminator(x,
                  y,
                  scopename="discriminator",
                  maps=64,
                  n_classes=1000,
                  s=4,
                  test=False,
                  sn=True):
    with nn.parameter_scope(scopename):
        # Resblocks
        h = optblock_d(x, y, "block-1", n_classes, maps * 1, test=test, sn=sn)
        h = resblock_d(h, y, "block-2", n_classes, maps * 2, test=test, sn=sn)
        h = attnblock(h, sn=sn, test=test)
        h = resblock_d(h, y, "block-3", n_classes, maps * 4, test=test, sn=sn)
        h = resblock_d(h, y, "block-4", n_classes, maps * 8, test=test, sn=sn)
        h = resblock_d(h, y, "block-5", n_classes, maps * 16, test=test, sn=sn)
        h = resblock_d(h,
                       y,
                       "block-6",
                       n_classes,
                       maps * 16,
                       downsample=False,
                       test=test,
                       sn=sn)
        # Last affine
        #h = F.leaky_relu(h, 0.2)
        h = F.relu(h)
        h = F.sum(h, axis=(2, 3))
        o0 = affine(h, 1, sn=sn, test=test)
        # Project discriminator
        l, u = calc_uniform_lim_glorot(n_classes, maps * 16)
        e = embed(y,
                  n_classes,
                  maps * 16,
                  initializer=UniformInitializer((l, u)),
                  name="projection",
                  sn=sn,
                  test=test)
        o1 = F.sum(h * e, axis=1, keepdims=True)
    return o0 + o1
Example #26
0
def masked_convolution(inp,
                       outmaps,
                       kernel,
                       pad=None,
                       stride=None,
                       dilation=None,
                       group=1,
                       w_init=None,
                       b_init=None,
                       base_axis=1,
                       fix_parameters=False,
                       rng=None,
                       with_bias=True):
    """
    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    mask_w = get_parameter_or_create("Mw", w.shape, ConstantInitializer(0.),
                                     False)
    w_masked = w * mask_w
    b = None
    b_masked = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
        mask_b = get_parameter_or_create("Mb", b.shape,
                                         ConstantInitializer(0.), False)
        b_masked = b * mask_b

    return F.convolution(inp, w_masked, b_masked, base_axis, pad, stride,
                         dilation, group)
Example #27
0
def conv_norm(inputs, out_channels, kernel_size, stride, padding, dilation,
              bias, w_init_gain, scope, **kargs):
    r"""1D convolutional layer.

    Args:
        inputs (nn.Variable): An input variable of shape (B, C, T).
        out_channels (int): The number of ouput channels.
        kernel_size (int): The kernel size.
        stride (int): The stride.
        padding (int): The number of paddings.
        dilation (int): The dilation.
        bias (bool): Whether bias is used.
        w_init_gain (str): The non-linear function.
        scope (str): The parameter scope name.

    Returns:
        nn.Variable: An output variable.
    """
    with nn.parameter_scope(scope):
        base_axis = len(inputs.shape) - \
                        1 if kargs.get('channel_last', False) else 1
        lim = xavier_uniform_bound(inputs.shape,
                                   out_channels, (kernel_size, ),
                                   base_axis,
                                   nonlinearity=w_init_gain,
                                   is_affine=False)
        w_init = UniformInitializer(lim)
        out = PF.convolution(inputs,
                             out_channels,
                             kernel=(kernel_size, ),
                             stride=(stride, ),
                             pad=(padding, ),
                             w_init=w_init,
                             dilation=(dilation, ),
                             with_bias=bias,
                             **kargs)
    return out
def cifar10_resnet23_prediction(image, ctx, test=False):
    """
    Construct ResNet 23
    """

    # Residual Unit
    def res_unit(x, scope_name, rng, dn=False, test=False):
        C = x.shape[1]
        with nn.parameter_scope(scope_name):

            # Conv -> BN -> Relu
            with nn.parameter_scope("conv1"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C, C / 2, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(x,
                                   C / 2,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN -> Relu
            with nn.parameter_scope("conv2"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C / 2, kernel=(3, 3)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C / 2,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN
            with nn.parameter_scope("conv3"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
            # Residual -> Relu
            h = F.relu(h + x)

            # Maxpooling
            if dn:
                h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

            return h

    # Random generator for using the same init parameters in all devices
    rng = np.random.RandomState(0)
    nmaps = 64
    ncls = 10

    # Conv -> BN -> Relu
    with nn.context_scope(ctx):
        with nn.parameter_scope("conv1"):
            # Preprocess
            if not test:

                image = F.image_augmentation(image,
                                             contrast=1.0,
                                             angle=0.25,
                                             flip_lr=True)
                image.need_grad = False

            w_init = UniformInitializer(calc_uniform_lim_glorot(3,
                                                                nmaps,
                                                                kernel=(3, 3)),
                                        rng=rng)
            h = PF.convolution(image,
                               nmaps,
                               kernel=(3, 3),
                               pad=(1, 1),
                               w_init=w_init,
                               with_bias=False)
            h = PF.batch_normalization(h, batch_stat=not test)
            h = F.relu(h)

        h = res_unit(h, "conv2", rng, False)  # -> 32x32
        h = res_unit(h, "conv3", rng, True)  # -> 16x16
        h = res_unit(h, "conv4", rng, False)  # -> 16x16
        h = res_unit(h, "conv5", rng, True)  # -> 8x8
        h = res_unit(h, "conv6", rng, False)  # -> 8x8
        h = res_unit(h, "conv7", rng, True)  # -> 4x4
        h = res_unit(h, "conv8", rng, False)  # -> 4x4
        h = F.average_pooling(h, kernel=(4, 4))  # -> 1x1

        w_init = UniformInitializer(calc_uniform_lim_glorot(int(
            np.prod(h.shape[1:])),
                                                            ncls,
                                                            kernel=(1, 1)),
                                    rng=rng)
        pred = PF.affine(h, ncls, w_init=w_init)

    return pred
Example #29
0
def binary_connect_convolution(inp,
                               outmaps,
                               kernel,
                               pad=None,
                               stride=None,
                               dilation=None,
                               group=1,
                               w_init=None,
                               wb_init=None,
                               b_init=None,
                               base_axis=1,
                               fix_parameters=False,
                               rng=None,
                               with_bias=True):
    """Binary Connect Convolution, multiplier-less inner-product.

    Binary Connect Convolution is the convolution function, 
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with BatchNormalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                     rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis]) +
                                tuple(kernel), w_init, not fix_parameters)
    wb = get_parameter_or_create("Wb", (outmaps, inp.shape[base_axis]) +
                                 tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.binary_connect_convolution(inp, w, wb, b, base_axis, pad, stride,
                                        dilation, group)
Example #30
0
def binary_weight_convolution(inp,
                              outmaps,
                              kernel,
                              pad=None,
                              stride=None,
                              dilation=None,
                              group=1,
                              w_init=None,
                              wb_init=None,
                              b_init=None,
                              base_axis=1,
                              fix_parameters=False,
                              rng=None,
                              with_bias=True):
    """Binary Weight Convolution, multiplier-less inner-product with a scale factor.

    Binary Weight Convolution is the convolution function, but the
    inner product in this function is the following,

    .. math::

        y_{n, a, b} = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.


    Therefore :math:`sign(w_{n, m, i, j})`  is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}}`.
    The number of :math:`n` is the number of outmaps of the convolution
    function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                     rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis]) +
                                tuple(kernel), w_init, not fix_parameters)
    wb = get_parameter_or_create("Wb", (outmaps, inp.shape[base_axis]) +
                                 tuple(kernel), w_init, not fix_parameters)
    alpha = get_parameter_or_create("alpha", (outmaps, ),
                                    ConstantInitializer(0), False)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.binary_weight_convolution(inp, w, wb, alpha, b, base_axis, pad,
                                       stride, dilation, group)