コード例 #1
0
def _create_variable(v, name, shape):
    # Create and initialize variables
    class Variable:
        pass

    parameter = v.type == "Parameter"
    variable_instance = None
    if parameter:
        if v.initializer.type == 'Normal':
            initializer = NormalInitializer(v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Uniform':
            initializer = UniformInitializer(
                lim=[-v.initializer.multiplier, v.initializer.multiplier])
        elif v.initializer.type == 'UniformAffineGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'UniformConvolutionGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Constant':
            initializer = ConstantInitializer(value=v.initializer.multiplier)
        else:
            initializer = None
        variable_instance = get_parameter_or_create(name, shape, initializer)
    else:
        # create empty variable, memory will be allocated in network.setup()
        # after network optimization
        variable_instance = nn.Variable()

    variable = Variable()
    variable.name = name
    variable.parameter = parameter
    variable.shape = shape
    variable.variable_instance = variable_instance

    return variable
コード例 #2
0
ファイル: load.py プロジェクト: zwsong/nnabla
def _create_variable(v, name, shape):
    # Create and initialize variables
    class Variable:
        pass

    parameter = v.type == "Parameter"
    variable_instance = None
    if parameter:
        if v.initializer.type == 'Normal':
            initializer = NormalInitializer(v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Uniform':
            initializer = UniformInitializer(
                lim=[-v.initializer.multiplier, v.initializer.multiplier])
        elif v.initializer.type == 'UniformAffineGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'UniformConvolutionGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Constant':
            initializer = ConstantInitializer(value=v.initializer.multiplier)
        else:
            initializer = None
        variable_instance = get_parameter_or_create(name, shape, initializer)
    else:
        # create empty variable, memory will be allocated in network.setup()
        # after network optimization
        variable_instance = nn.Variable()

    variable = Variable()
    variable.name = name
    variable.parameter = parameter
    variable.shape = shape
    variable.variable_instance = variable_instance

    return variable
コード例 #3
0
    def res_unit(x, scope_name, rng, dn=False, test=False):
        C = x.shape[1]
        with nn.parameter_scope(scope_name):

            # Conv -> BN -> Relu
            with nn.parameter_scope("conv1"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C, C / 2, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(x,
                                   C / 2,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN -> Relu
            with nn.parameter_scope("conv2"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C / 2, kernel=(3, 3)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C / 2,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN
            with nn.parameter_scope("conv3"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
            # Residual -> Relu
            h = F.relu(h + x)

            # Maxpooling
            if dn:
                h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

            return h
コード例 #4
0
ファイル: models.py プロジェクト: sony/nnabla-examples
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                itr=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True,
                sn=True,
                test=False,
                init_scale=1.0):
    """
    """
    if w_init is None:
        l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps,
                                       tuple(kernel))
        l, u = init_scale * l, init_scale * u
        w_init = UniformInitializer((l, u), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) +
                                tuple(kernel), w_init, not fix_parameters)
    w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
コード例 #5
0
    def __init__(self,
                 n_inmaps,
                 n_outmaps,
                 base_axis=1,
                 w_init=None,
                 b_init=None,
                 fix_parameters=False,
                 rng=None,
                 with_bias=True):
        if not hasattr(n_outmaps, '__iter__'):
            n_outmaps = [n_outmaps]
        n_outmaps = list(n_outmaps)
        n_outmap = int(np.prod(n_outmaps))
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                n_inmaps, n_outmap),
                                        rng=rng)
        if with_bias and b_init is None:
            b_init = ConstantInitializer()
        w_shape = (n_inmaps, n_outmap)
        w = nn.Variable.from_numpy_array(
            w_init(w_shape)).apply(need_grad=not fix_parameters)
        b = None
        if with_bias:
            b_shape = (n_outmap, )
            b = nn.Variable.from_numpy_array(
                b_init(b_shape)).apply(need_grad=not fix_parameters)

        self.W = w
        self.b = b
        self.base_axis = base_axis
コード例 #6
0
    def __init__(self,
                 in_features,
                 out_features,
                 base_axis=1,
                 w_init=None,
                 b_init=None,
                 rng=None,
                 bias=True,
                 name=''):
        Module.__init__(self, name=name)
        self._scope_name = f'<linear at {hex(id(self))}>'

        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                in_features, out_features),
                                        rng=rng)
        self._W = Parameter((in_features, out_features),
                            initializer=w_init,
                            scope=self._scope_name)
        self._b = None

        if bias:
            if b_init is None:
                b_init = ConstantInitializer()
            self._b = Parameter((out_features, ),
                                initializer=b_init,
                                scope=self._scope_name)

        self._base_axis = base_axis
        self._in_features = in_features
        self._out_features = out_features
コード例 #7
0
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           itr=1,
           fix_parameters=False, rng=None, with_bias=True,
           sn=True, test=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    w_sn = spectral_normalization_for_affine(
        w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w_sn, b, base_axis)
コード例 #8
0
    def __init__(self, inmaps, outmaps, kernel,
                 pad=None, stride=None, dilation=None, group=1,
                 w_init=None, b_init=None,
                 base_axis=1, fix_parameters=False, rng=None, with_bias=True):
        if w_init is None:
            w_init = UniformInitializer(
                calc_uniform_lim_glorot(inmaps, outmaps, tuple(kernel)), rng=rng)
        if with_bias and b_init is None:
            b_init = ConstantInitializer()
        w_shape = (outmaps, inmaps // group) + tuple(kernel)
        w = nn.Variable.from_numpy_array(
            w_init(w_shape)).apply(need_grad=not fix_parameters)
        b = None
        if with_bias:
            b_shape = (outmaps, )
            b = nn.Variable.from_numpy_array(
                b_init(b_shape)).apply(need_grad=not fix_parameters)

        self.W = w
        self.b = b
        self.base_axis = base_axis
        self.pad = pad
        self.stride = stride
        self.dilation = dilation
        self.group = group
コード例 #9
0
def inq_convolution(inp, outmaps, kernel,
                    pad=None, stride=None, dilation=None, group=1,
                    num_bits=4, inq_iterations=(), selection_algorithm='random',
                    seed=-1, w_init=None, i_init=None, b_init=None,
                    base_axis=1, fix_parameters=False, rng=None,
                    with_bias=True):
    """Incremental Network Quantization Convolution Layer

    During training, the weights are sequentially quantized to power-of-two
    values, which allows the training of a multiplierless network.

    Using `inq_iterations`, one can specify after how many forward passes
    half of the learnable weights are fixed and quantized to powers-of-two.
    After reaching the last value in `inq_iterations`, all weights are fixed.

    For more details, please refer to the reference.

    Reference:
    Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization:
    Towards lossless CNNs with low-precision weights.
    <https://arxiv.org/abs/1702.03044>

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        num_bits (int): Number of bits per weight. Value has to be larger than 1 as one bit is already used to code the value "0"
        inq_iterations (tuple of int): Tuple of iteration numbers at which we fix half of the weights.
        selection_algorithm (str): Chooses algorithm that is used to decide which weights are fixed. ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly)
        seed (int): Random seed for INQ algorithm
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        i_init (~nnabla.initializer.BaseInitializer): Initializer for the indicators (0 ... learnable, 1 ... fixed).
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if i_init is None:
        i_init = ConstantInitializer()
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    i = get_parameter_or_create(
        "I", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        i_init, False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.inq_convolution(inp, w, i, b, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)
コード例 #10
0
ファイル: functions.py プロジェクト: sony/nnabla-examples
def conv(inp,
         outmaps,
         kernel,
         pad=None,
         stride=None,
         dilation=None,
         group=1,
         w_init=None,
         b_init=None,
         base_axis=1,
         fix_parameters=False,
         rng=None,
         with_bias=True,
         use_wscale=True,
         use_he_backward=False):
    """
    """
    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis],
                                          outmaps,
                                          kernel=kernel)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis],
                                         outmaps,
                                         kernel=kernel)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], outmaps, tuple(kernel)),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)

    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
コード例 #11
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel,
                 pad=None,
                 stride=None,
                 dilation=None,
                 group=1,
                 w_init=None,
                 b_init=None,
                 base_axis=1,
                 fix_parameters=False,
                 rng=None,
                 with_bias=True,
                 channel_last=False,
                 name=''):

        Module.__init__(self, name=name)
        self._scope_name = f'<convolution at {hex(id(self))}>'

        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                in_channels, out_channels, tuple(kernel)),
                                        rng=rng)

        w_shape = (out_channels, in_channels // group) + tuple(kernel)
        b_shape = (out_channels, )

        self._b = None
        if with_bias and b_init is None:
            b_init = ConstantInitializer()

        if fix_parameters:
            self._W = nn.Variable.from_numpy_array(w_init(w_shape))
            if with_bias:
                self._b = nn.Variable.from_numpy_array(b_init(b_shape))
        else:
            self._W = Parameter(w_shape,
                                initializer=w_init,
                                scope=self._scope_name)
            if with_bias:
                self._b = Parameter(b_shape,
                                    initializer=b_init,
                                    scope=self._scope_name)

        self._base_axis = base_axis
        self._pad = pad
        self._stride = stride
        self._dilation = dilation
        self._group = group
        self._kernel = kernel
        self._in_channels = in_channels
        self._out_channels = out_channels
        self._channel_last = channel_last
        self._fix_parameters = fix_parameters
        self._rng = rng
コード例 #12
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True):
    """
    N-D Convolution with a bias term.

    For Dilated Convolution (a.k.a. Atrous Convolusion), refer to:

    - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915

    - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
コード例 #13
0
ファイル: functions.py プロジェクト: sony/nnabla-examples
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True,
           use_wscale=True,
           use_he_backward=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))

    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], n_outmaps),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)

    return F.affine(inp, w, b, base_axis)
コード例 #14
0
    def res_unit(x, scope_name, rng, dn=False, test=False):
        C = x.shape[1]
        with nn.parameter_scope(scope_name):

            # Conv -> BN -> Relu
            with nn.parameter_scope("conv1"):
                w_init = UniformInitializer(
                    calc_uniform_lim_glorot(C, C / 2, kernel=(1, 1)),
                    rng=rng)
                h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0),
                                   w_init=w_init, with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN -> Relu
            with nn.parameter_scope("conv2"):
                w_init = UniformInitializer(
                    calc_uniform_lim_glorot(C / 2, C / 2, kernel=(3, 3)),
                    rng=rng)
                h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1),
                                   w_init=w_init, with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN
            with nn.parameter_scope("conv3"):
                w_init = UniformInitializer(
                    calc_uniform_lim_glorot(C / 2, C, kernel=(1, 1)),
                    rng=rng)
                h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0),
                                   w_init=w_init, with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
            # Residual -> Relu
            h = F.relu(h + x)

            # Maxpooling
            if dn:
                h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

            return h
コード例 #15
0
def dense(x,
          output_dim,
          base_axis=1,
          w_init=None,
          b_init=I.ConstantInitializer(0),
          activation=F.tanh):
    if w_init is None:
        w_init = I.UniformInitializer(
            I.calc_uniform_lim_glorot(np.prod(x.shape[1:]), output_dim))
    return activation(
        PF.affine(x,
                  output_dim,
                  base_axis=base_axis,
                  w_init=w_init,
                  b_init=b_init))
コード例 #16
0
def deconvolution(inp,
                  outmaps,
                  kernel,
                  pad=None,
                  stride=None,
                  dilation=None,
                  group=1,
                  w_init=None,
                  b_init=None,
                  base_axis=1,
                  fix_parameters=False,
                  rng=None,
                  with_bias=True):
    """
    Deconvolution layer.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of deconvolution kernels (which is equal to the number of output channels). For example, to apply deconvolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply deconvolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            outmaps, inp.shape[base_axis], tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (inp.shape[base_axis], outmaps / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.deconvolution(inp, w, b, base_axis, pad, stride, dilation, group)
コード例 #17
0
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True):
    """
    The affine layer, also known as the fully connected layer. Computes

    .. math::
        {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}.

    where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively,
    and :math:`{\\mathbf A}, {\\mathbf b}` are constants.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(calc_uniform_lim_glorot(inmaps, n_outmap),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] +
                                n_outmaps, w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w, b, base_axis)
コード例 #18
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def convolution(inp, outmaps, kernel,
                pad=None, stride=None, dilation=None, group=1,
                w_init=None, b_init=None,
                base_axis=1, fix_parameters=False, rng=None, with_bias=True):
    """
    N-D Convolution with a bias term.

    For Dilated Convolution (a.k.a. Atrous Convolusion), refer to:

    - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915

    - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
コード例 #19
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           fix_parameters=False, rng=None, with_bias=True):
    """
    The affine layer, also known as the fully connected layer. Computes

    .. math::
        {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}.

    where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively,
    and :math:`{\\mathbf A}, {\\mathbf b}` are constants.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w, b, base_axis)
コード例 #20
0
ファイル: models.py プロジェクト: sony/nnabla-examples
def discriminator(x,
                  y,
                  scopename="discriminator",
                  maps=64,
                  n_classes=1000,
                  s=4,
                  test=False,
                  sn=True):
    with nn.parameter_scope(scopename):
        # Resblocks
        h = optblock_d(x, y, "block-1", n_classes, maps * 1, test=test, sn=sn)
        h = resblock_d(h, y, "block-2", n_classes, maps * 2, test=test, sn=sn)
        h = attnblock(h, sn=sn, test=test)
        h = resblock_d(h, y, "block-3", n_classes, maps * 4, test=test, sn=sn)
        h = resblock_d(h, y, "block-4", n_classes, maps * 8, test=test, sn=sn)
        h = resblock_d(h, y, "block-5", n_classes, maps * 16, test=test, sn=sn)
        h = resblock_d(h,
                       y,
                       "block-6",
                       n_classes,
                       maps * 16,
                       downsample=False,
                       test=test,
                       sn=sn)
        # Last affine
        #h = F.leaky_relu(h, 0.2)
        h = F.relu(h)
        h = F.sum(h, axis=(2, 3))
        o0 = affine(h, 1, sn=sn, test=test)
        # Project discriminator
        l, u = calc_uniform_lim_glorot(n_classes, maps * 16)
        e = embed(y,
                  n_classes,
                  maps * 16,
                  initializer=UniformInitializer((l, u)),
                  name="projection",
                  sn=sn,
                  test=test)
        o1 = F.sum(h * e, axis=1, keepdims=True)
    return o0 + o1
コード例 #21
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def deconvolution(inp, outmaps, kernel,
                  pad=None, stride=None, dilation=None, group=1,
                  w_init=None, b_init=None,
                  base_axis=1, fix_parameters=False, rng=None, with_bias=True):
    """
    Deconvolution layer.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of deconvolution kernels (which is equal to the number of output channels). For example, to apply deconvolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply deconvolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(outmaps, inp.shape[base_axis], tuple(kernel)), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (inp.shape[base_axis], outmaps / group) + tuple(kernel),
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.deconvolution(inp, w, b, base_axis, pad, stride, dilation, group)
コード例 #22
0
ファイル: models.py プロジェクト: sony/nnabla-examples
def masked_convolution(inp,
                       outmaps,
                       kernel,
                       pad=None,
                       stride=None,
                       dilation=None,
                       group=1,
                       w_init=None,
                       b_init=None,
                       base_axis=1,
                       fix_parameters=False,
                       rng=None,
                       with_bias=True):
    """
    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    mask_w = get_parameter_or_create("Mw", w.shape, ConstantInitializer(0.),
                                     False)
    w_masked = w * mask_w
    b = None
    b_masked = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
        mask_b = get_parameter_or_create("Mb", b.shape,
                                         ConstantInitializer(0.), False)
        b_masked = b * mask_b

    return F.convolution(inp, w_masked, b_masked, base_axis, pad, stride,
                         dilation, group)
コード例 #23
0
def binary_connect_convolution(inp,
                               outmaps,
                               kernel,
                               pad=None,
                               stride=None,
                               dilation=None,
                               group=1,
                               w_init=None,
                               wb_init=None,
                               b_init=None,
                               base_axis=1,
                               fix_parameters=False,
                               rng=None,
                               with_bias=True):
    """Binary Connect Convolution, multiplier-less inner-product.

    Binary Connect Convolution is the convolution function, 
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with BatchNormalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                     rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis]) +
                                tuple(kernel), w_init, not fix_parameters)
    wb = get_parameter_or_create("Wb", (outmaps, inp.shape[base_axis]) +
                                 tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.binary_connect_convolution(inp, w, wb, b, base_axis, pad, stride,
                                        dilation, group)
コード例 #24
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def binary_connect_convolution(inp, outmaps, kernel,
                               pad=None, stride=None, dilation=None, group=1,
                               w_init=None, wb_init=None, b_init=None,
                               base_axis=1, fix_parameters=False, rng=None,
                               with_bias=True):
    """Binary Connect Convolution, multiplier-less inner-product.

    Binary Connect Convolution is the convolution function, 
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with BatchNormalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.binary_connect_convolution(inp, w, wb, b, base_axis, pad, stride, dilation, group)
コード例 #25
0
def binary_weight_affine(inp,
                         n_outmaps,
                         base_axis=1,
                         w_init=None,
                         wb_init=None,
                         b_init=None,
                         fix_parameters=False,
                         rng=None,
                         with_bias=True):
    """Binary Weight Affine, multiplier-less inner-product with a scale factor.

    Binary Weight Affine is the affine function, but the inner product
    in this function is the following,

    .. math::

        y_j = \\frac{1}{\\|\\mathbf{w}_j\\|_{\\ell_1}} \sum_{i} sign(w_{ji}) x_i

    Therefore :math:`sign(w_{ji})` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_j\\|_{\\ell_1}}`.
    The number of ::math:`\\alpha` is the outmaps of the affine function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for the binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(calc_uniform_lim_glorot(fan_in, n_outmap),
                                    rng=rng)
    if wb_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        wb_init = UniformInitializer(calc_uniform_lim_glorot(fan_in, n_outmap),
                                     rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] +
                                n_outmaps, w_init, not fix_parameters)
    wb = get_parameter_or_create("Wb", [int(np.prod(inp.shape[base_axis:]))] +
                                 n_outmaps, wb_init, not fix_parameters)
    alpha = get_parameter_or_create("alpha", n_outmaps, ConstantInitializer(0),
                                    False)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)
    return F.binary_weight_affine(inp, w, wb, alpha, b, base_axis)
コード例 #26
0
def binary_connect_affine(inp,
                          n_outmaps,
                          base_axis=1,
                          w_init=None,
                          wb_init=None,
                          b_init=None,
                          fix_parameters=False,
                          rng=None,
                          with_bias=True):
    """Binary Connect Affine, multiplier-less inner-product.

    Binary Connect Affine is an affine function,
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_i = \sum_{i} sign(w_i) x_i.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with Batch Normalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.

    Returns:
        :class:`~nnabla.Variable`

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(calc_uniform_lim_glorot(fan_in, n_outmap),
                                    rng=rng)
    if wb_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        wb_init = UniformInitializer(calc_uniform_lim_glorot(fan_in, n_outmap),
                                     rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] +
                                n_outmaps, w_init, not fix_parameters)
    wb = get_parameter_or_create("Wb", [int(np.prod(inp.shape[base_axis:]))] +
                                 n_outmaps, wb_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)
    return F.binary_connect_affine(inp, w, wb, b, base_axis)
コード例 #27
0
def lstm(x,
         mask,
         state_size,
         w_init=None,
         inner_w_init=None,
         forget_bias_init=I.ConstantInitializer(1),
         b_init=I.ConstantInitializer(0),
         initial_state=None,
         dropout=0,
         train=True,
         rng=np.random):
    """
    x: (batch_size, length, input_size)
    mask: (batch_size, length)
    """
    batch_size, length, input_size = x.shape

    if w_init is None:
        w_init = I.UniformInitializer(
            I.calc_uniform_lim_glorot(input_size, state_size))
    if inner_w_init is None:
        inner_w_init = orthogonal

    retain_prob = 1.0 - dropout
    z_w = nn.Variable((batch_size, 4, input_size), need_grad=False)
    z_w.d = 1
    z_u = nn.Variable((batch_size, 4, state_size), need_grad=False)
    z_u.d = 1

    if dropout > 0:
        if train:
            z_w = F.dropout(z_w, p=retain_prob)
            z_u = F.dropout(z_u, p=retain_prob)
        z_w *= retain_prob
        z_u *= retain_prob

    z_w = F.reshape(z_w, (batch_size, 4, 1, input_size))
    z_w = F.broadcast(z_w, (batch_size, 4, length, input_size))
    z_w = F.split(z_w, axis=1)
    z_u = F.split(z_u, axis=1)
    xi = z_w[0] * x
    xf = z_w[1] * x
    xc = z_w[2] * x
    xo = z_w[3] * x

    with nn.parameter_scope("lstm"):
        # (batch_size, length, state_size)
        xi = PF.affine(xi,
                       state_size,
                       base_axis=2,
                       w_init=w_init,
                       b_init=b_init,
                       name="Wi")
        xf = PF.affine(xf,
                       state_size,
                       base_axis=2,
                       w_init=w_init,
                       b_init=forget_bias_init,
                       name="Wf")
        xc = PF.affine(xc,
                       state_size,
                       base_axis=2,
                       w_init=w_init,
                       b_init=b_init,
                       name="Wc")
        xo = PF.affine(xo,
                       state_size,
                       base_axis=2,
                       w_init=w_init,
                       b_init=b_init,
                       name="Wo")

    if initial_state is None:
        h = nn.Variable((batch_size, state_size), need_grad=False)
        h.data.zero()
    else:
        h = initial_state
    c = nn.Variable((batch_size, state_size), need_grad=False)
    c.data.zero()

    # (batch_size, state_size)
    xi = split(xi, axis=1)
    xf = split(xf, axis=1)
    xc = split(xc, axis=1)
    xo = split(xo, axis=1)
    mask = F.reshape(mask, [batch_size, length, 1])  # (batch_size, length, 1)
    mask = F.broadcast(mask, [batch_size, length, state_size])
    # (batch_size, state_size)
    mask = split(mask, axis=1)

    hs = []
    cs = []
    with nn.parameter_scope("lstm"):
        for i, f, c2, o, m in zip(xi, xf, xc, xo, mask):
            i_t = PF.affine(z_u[0] * h,
                            state_size,
                            w_init=inner_w_init(state_size, state_size),
                            with_bias=False,
                            name="Ui")
            i_t = F.sigmoid(i + i_t)
            f_t = PF.affine(z_u[1] * h,
                            state_size,
                            w_init=inner_w_init(state_size, state_size),
                            with_bias=False,
                            name="Uf")
            f_t = F.sigmoid(f + f_t)
            c_t = PF.affine(z_u[2] * h,
                            state_size,
                            w_init=inner_w_init(state_size, state_size),
                            with_bias=False,
                            name="Uc")
            c_t = f_t * c + i_t * F.tanh(c2 + c_t)
            o_t = PF.affine(z_u[3] * h,
                            state_size,
                            w_init=inner_w_init(state_size, state_size),
                            with_bias=False,
                            name="Uo")
            o_t = F.sigmoid(o + o_t)
            h_t = o_t * F.tanh(c_t)

            h_t = (1 - m) * h + m * h_t
            c_t = (1 - m) * c + m * c_t
            h = h_t
            c = c_t
            h_t = F.reshape(h_t, (batch_size, 1, state_size), inplace=False)
            c_t = F.reshape(c_t, (batch_size, 1, state_size), inplace=False)
            hs.append(h_t)
            cs.append(c_t)
    return concatenate(*hs, axis=1), concatenate(*cs, axis=1)
コード例 #28
0
def cond_att_lstm(x,
                  parent_index,
                  mask,
                  context,
                  context_mask,
                  state_size,
                  att_hidden_size,
                  initial_state=None,
                  initial_cell=None,
                  hist=None,
                  dropout=0,
                  train=True,
                  w_init=None,
                  inner_w_init=None,
                  b_init=I.ConstantInitializer(0),
                  forget_bias_init=I.ConstantInitializer(1)):
    """
    x: (batch_size, length, input_size)
    parent_index: (batch_size, length)
    mask: (batch_size, length)
    context: (batch_size, context_length, context_size)
    context_mask: (batch_size, context_length)
    hist: (batch_size, l, state_size)
    """
    batch_size, length, input_size = x.shape
    _, context_length, context_size = context.shape

    if w_init is None:
        w_init = I.UniformInitializer(
            I.calc_uniform_lim_glorot(input_size, state_size))
    if inner_w_init is None:
        inner_w_init = orthogonal

    retain_prob = 1.0 - dropout
    z_w = nn.Variable((batch_size, 4, input_size), need_grad=False)
    z_w.d = 1
    z_u = nn.Variable((batch_size, 4, state_size), need_grad=False)
    z_u.d = 1

    if dropout > 0:
        if train:
            z_w = F.dropout(z_w, p=retain_prob)
            z_u = F.dropout(z_u, p=retain_prob)
        z_w *= retain_prob
        z_u *= retain_prob

    z_w = F.reshape(z_w, (batch_size, 4, 1, input_size))
    z_w = F.broadcast(z_w, (batch_size, 4, length, input_size))
    z_w = F.split(z_w, axis=1)
    z_u = F.split(z_u, axis=1)
    xi = z_w[0] * x
    xf = z_w[1] * x
    xc = z_w[2] * x
    xo = z_w[3] * x

    with nn.parameter_scope("cond_att_lstm"):
        # (batch_size, length, state_size)
        with nn.parameter_scope("lstm"):
            xi = PF.affine(
                xi,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wi")
            xf = PF.affine(
                xf,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=forget_bias_init,
                name="Wf")
            xc = PF.affine(
                xc,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wc")
            xo = PF.affine(
                xo,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wo")

        with nn.parameter_scope("context"):
            # context_att_trans: (batch_size, context_size, att_hidden_size)
            context_att_trans = PF.affine(
                context,
                att_hidden_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="layer1_c")

    if initial_state is None:
        h = nn.Variable((batch_size, state_size), need_grad=False)
        h.data.zero()
    else:
        h = initial_state

    if initial_cell is None:
        c = nn.Variable((batch_size, state_size), need_grad=False)
        c.data.zero()
    else:
        c = initial_cell

    if hist is None:
        hist = nn.Variable((batch_size, 1, state_size), need_grad=False)
        hist.data.zero()

    # (batch_size, state_size)
    xi = split(xi, axis=1)
    xf = split(xf, axis=1)
    xc = split(xc, axis=1)
    xo = split(xo, axis=1)
    mask = F.reshape(mask, [batch_size, length, 1])  # (batch_size, length, 1)
    mask = F.broadcast(mask, [batch_size, length, state_size])
    # (batch_size, state_size)
    mask = split(mask, axis=1)
    # (batch_size, max_action_length)
    parent_index = parent_index + 1  # index == 0 means that parent is root
    # (batch_size)
    parent_index = split(parent_index, axis=1)

    hs = []
    cs = []
    ctx = []

    for i, f, c2, o, m, p in zip(xi, xf, xc, xo, mask, parent_index):
        h_num = hist.shape[1]
        with nn.parameter_scope("context"):
            h_att_trans = PF.affine(
                h,
                att_hidden_size,
                with_bias=False,
                w_init=w_init,
                name="layer1_h")  # (batch_size, att_hidden_size)
            h_att_trans = F.reshape(h_att_trans,
                                    (batch_size, 1, att_hidden_size))
            h_att_trans = F.broadcast(
                h_att_trans, (batch_size, context_length, att_hidden_size))
            att_hidden = F.tanh(context_att_trans + h_att_trans)
            att_raw = PF.affine(
                att_hidden, 1, base_axis=2, w_init=w_init,
                b_init=b_init)  # (batch_size, context_length, 1)
            att_raw = F.reshape(att_raw, (batch_size, context_length))
            ctx_att = F.exp(att_raw - F.max(att_raw, axis=1, keepdims=True))
            ctx_att = ctx_att * context_mask
            ctx_att = ctx_att / F.sum(ctx_att, axis=1, keepdims=True)
            ctx_att = F.reshape(ctx_att, (batch_size, context_length, 1))
            ctx_att = F.broadcast(ctx_att,
                                  (batch_size, context_length, context_size))
            ctx_vec = F.sum(
                context * ctx_att, axis=1)  # (batch_size, context_size)

        # parent_history
        p = F.reshape(p, (batch_size, 1))
        p = F.one_hot(p, (h_num, ))
        p = F.reshape(p, (batch_size, 1, h_num))
        par_h = F.batch_matmul(p, hist)  # [batch_size, 1, state_size]
        par_h = F.reshape(par_h, (batch_size, state_size))

        with nn.parameter_scope("lstm"):
            i_t = PF.affine(
                z_u[0] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Ui")
            i_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Ci")
            i_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pi")
            i_t = F.sigmoid(i + i_t)
            f_t = PF.affine(
                z_u[1] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uf")
            f_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Cf")
            f_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pf")
            f_t = F.sigmoid(f + f_t)
            c_t = PF.affine(
                z_u[2] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uc")
            c_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Cc")
            c_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pc")
            c_t = f_t * c + i_t * F.tanh(c2 + c_t)
            o_t = PF.affine(
                z_u[3] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uo")
            o_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Co")
            o_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Po")
            o_t = F.sigmoid(o + o_t)
            h_t = o_t * F.tanh(c_t)

            h_t = (1 - m) * h + m * h_t
            c_t = (1 - m) * c + m * c_t
            h = h_t
            c = c_t
            h_t = F.reshape(h_t, (batch_size, 1, state_size), inplace=False)
            c_t = F.reshape(c_t, (batch_size, 1, state_size), inplace=False)
            ctx_vec = F.reshape(
                ctx_vec, (batch_size, 1, context_size), inplace=False)
            hs.append(h_t)
            cs.append(c_t)
            ctx.append(ctx_vec)

            hist = F.concatenate(
                hist, h_t, axis=1)  # (batch_size, h_num + 1, state_size)

    return concatenate(
        *hs, axis=1), concatenate(
            *cs, axis=1), concatenate(
                *ctx, axis=1), hist
コード例 #29
0
def cifar100_resnet23_prediction(image,
                                 ctx, test=False):
    """
    Construct ResNet 23
    """
    # Residual Unit
    def res_unit(x, scope_name, rng, dn=False, test=False):
        C = x.shape[1]
        with nn.parameter_scope(scope_name):

            # Conv -> BN -> Relu
            with nn.parameter_scope("conv1"):
                w_init = UniformInitializer(
                    calc_uniform_lim_glorot(C, C / 2, kernel=(1, 1)),
                    rng=rng)
                h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0),
                                   w_init=w_init, with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN -> Relu
            with nn.parameter_scope("conv2"):
                w_init = UniformInitializer(
                    calc_uniform_lim_glorot(C / 2, C / 2, kernel=(3, 3)),
                    rng=rng)
                h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1),
                                   w_init=w_init, with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN
            with nn.parameter_scope("conv3"):
                w_init = UniformInitializer(
                    calc_uniform_lim_glorot(C / 2, C, kernel=(1, 1)),
                    rng=rng)
                h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0),
                                   w_init=w_init, with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
            # Residual -> Relu
            h = F.relu(h + x)

            # Maxpooling
            if dn:
                h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

            return h

    # Random generator for using the same init parameters in all devices
    rng = np.random.RandomState(0)
    nmaps = 384
    ncls = 100

    # Conv -> BN -> Relu
    with nn.context_scope(ctx):
        with nn.parameter_scope("conv1"):
            # Preprocess
            if not test:

                image = F.image_augmentation(image, contrast=1.0,
                                             angle=0.25,
                                             flip_lr=True)
                image.need_grad = False

            w_init = UniformInitializer(
                calc_uniform_lim_glorot(3, nmaps, kernel=(3, 3)),
                rng=rng)
            h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1),
                               w_init=w_init, with_bias=False)
            h = PF.batch_normalization(h, batch_stat=not test)
            h = F.relu(h)

        h = res_unit(h, "conv2", rng, False)    # -> 32x32
        h = res_unit(h, "conv3", rng, True)     # -> 16x16
        h = res_unit(h, "conv4", rng, False)    # -> 16x16
        h = res_unit(h, "conv5", rng, True)     # -> 8x8
        h = res_unit(h, "conv6", rng, False)    # -> 8x8
        h = res_unit(h, "conv7", rng, True)     # -> 4x4
        h = res_unit(h, "conv8", rng, False)    # -> 4x4
        h = F.average_pooling(h, kernel=(4, 4))  # -> 1x1

        w_init = UniformInitializer(
            calc_uniform_lim_glorot(int(np.prod(h.shape[1:])), ncls, kernel=(1, 1)), rng=rng)
        pred = PF.affine(h, ncls, w_init=w_init)

    return pred
コード例 #30
0
def cifar10_resnet23_prediction(image, ctx, test=False):
    """
    Construct ResNet 23
    """

    # Residual Unit
    def res_unit(x, scope_name, rng, dn=False, test=False):
        C = x.shape[1]
        with nn.parameter_scope(scope_name):

            # Conv -> BN -> Relu
            with nn.parameter_scope("conv1"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C, C / 2, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(x,
                                   C / 2,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN -> Relu
            with nn.parameter_scope("conv2"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C / 2, kernel=(3, 3)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C / 2,
                                   kernel=(3, 3),
                                   pad=(1, 1),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            # Conv -> BN
            with nn.parameter_scope("conv3"):
                w_init = UniformInitializer(calc_uniform_lim_glorot(
                    C / 2, C, kernel=(1, 1)),
                                            rng=rng)
                h = PF.convolution(h,
                                   C,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   w_init=w_init,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
            # Residual -> Relu
            h = F.relu(h + x)

            # Maxpooling
            if dn:
                h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

            return h

    # Random generator for using the same init parameters in all devices
    rng = np.random.RandomState(0)
    nmaps = 64
    ncls = 10

    # Conv -> BN -> Relu
    with nn.context_scope(ctx):
        with nn.parameter_scope("conv1"):
            # Preprocess
            if not test:

                image = F.image_augmentation(image,
                                             contrast=1.0,
                                             angle=0.25,
                                             flip_lr=True)
                image.need_grad = False

            w_init = UniformInitializer(calc_uniform_lim_glorot(3,
                                                                nmaps,
                                                                kernel=(3, 3)),
                                        rng=rng)
            h = PF.convolution(image,
                               nmaps,
                               kernel=(3, 3),
                               pad=(1, 1),
                               w_init=w_init,
                               with_bias=False)
            h = PF.batch_normalization(h, batch_stat=not test)
            h = F.relu(h)

        h = res_unit(h, "conv2", rng, False)  # -> 32x32
        h = res_unit(h, "conv3", rng, True)  # -> 16x16
        h = res_unit(h, "conv4", rng, False)  # -> 16x16
        h = res_unit(h, "conv5", rng, True)  # -> 8x8
        h = res_unit(h, "conv6", rng, False)  # -> 8x8
        h = res_unit(h, "conv7", rng, True)  # -> 4x4
        h = res_unit(h, "conv8", rng, False)  # -> 4x4
        h = F.average_pooling(h, kernel=(4, 4))  # -> 1x1

        w_init = UniformInitializer(calc_uniform_lim_glorot(int(
            np.prod(h.shape[1:])),
                                                            ncls,
                                                            kernel=(1, 1)),
                                    rng=rng)
        pred = PF.affine(h, ncls, w_init=w_init)

    return pred
コード例 #31
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def binary_weight_convolution(inp, outmaps, kernel,
                              pad=None, stride=None, dilation=None, group=1,
                              w_init=None, wb_init=None, b_init=None,
                              base_axis=1, fix_parameters=False, rng=None,
                              with_bias=True):
    """Binary Weight Convolution, multiplier-less inner-product with a scale factor.

    Binary Weight Convolution is the convolution function, but the
    inner product in this function is the following,

    .. math::

        y_{n, a, b} = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.


    Therefore :math:`sign(w_{n, m, i, j})`  is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}}`.
    The number of :math:`n` is the number of outmaps of the convolution
    function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    alpha = get_parameter_or_create(
        "alpha", (outmaps, ), ConstantInitializer(0), False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.binary_weight_convolution(inp, w, wb, alpha, b, base_axis, pad, stride, dilation, group)
コード例 #32
0
def binary_weight_convolution(inp,
                              outmaps,
                              kernel,
                              pad=None,
                              stride=None,
                              dilation=None,
                              group=1,
                              w_init=None,
                              wb_init=None,
                              b_init=None,
                              base_axis=1,
                              fix_parameters=False,
                              rng=None,
                              with_bias=True):
    """Binary Weight Convolution, multiplier-less inner-product with a scale factor.

    Binary Weight Convolution is the convolution function, but the
    inner product in this function is the following,

    .. math::

        y_{n, a, b} = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.


    Therefore :math:`sign(w_{n, m, i, j})`  is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}}`.
    The number of :math:`n` is the number of outmaps of the convolution
    function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                     rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis]) +
                                tuple(kernel), w_init, not fix_parameters)
    wb = get_parameter_or_create("Wb", (outmaps, inp.shape[base_axis]) +
                                 tuple(kernel), w_init, not fix_parameters)
    alpha = get_parameter_or_create("alpha", (outmaps, ),
                                    ConstantInitializer(0), False)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.binary_weight_convolution(inp, w, wb, alpha, b, base_axis, pad,
                                       stride, dilation, group)
コード例 #33
0
def quantized_affine(inp,
                     n_outmaps,
                     base_axis=1,
                     w_init=None,
                     b_init=None,
                     fix_parameters=False,
                     rng=None,
                     with_bias=True,
                     quantization_w=None,
                     quantization_b=None):
    """Quantized Affine.

    Quantized affine with

    .. math::

        y_j = \sum_{i} Q_w(w_{ji}) x_i + Q_b(b_j),

    where :math:`Q_w(.)` is the weight quantization function
    and :math:`Q_b(.)` the bias quantization function, respectively.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the quantized weights (`quantized weight`)

        2) The weights and the quantized weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the quantized weights will not be in sync.

        3) CPU and GPU implementations now use float value for `quantized weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for weight.
        b_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.
        quantization_w (function): Quantization function that is applied to the the weights.
            Use `None` to not quantize the weights.
        quantization_b (function): Quantization function that is applied to the the bias.
            Use `None` to not quantize the bias.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)

    """

    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(calc_uniform_lim_glorot(inmaps, n_outmap),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()

    # Floating Weight
    w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] +
                                n_outmaps, w_init, True, not fix_parameters)

    # Quantize weights
    if quantization_w is not None:
        w_q = get_parameter_or_create(
            "W_q", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            False)
        # Link computation graph
        real_w_q = quantization_w(w)
        real_w_q.persistent = True

        w_q.data = real_w_q.data
    else:
        real_w_q = w

    # Bias
    # Floating
    b = None
    b_q = None
    real_b_q = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, True,
                                    not fix_parameters)
        if quantization_b is not None:
            b_q = get_parameter_or_create("b_q", n_outmaps, b_init, False)
            # Link computation graph
            real_b_q = quantization_b(b)
            real_b_q.persistent = True

            b_q.data = real_b_q.data
        else:
            real_b_q = b

    return F.affine(inp, real_w_q, real_b_q, base_axis)
コード例 #34
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def binary_connect_affine(inp, n_outmaps,
                          base_axis=1,
                          w_init=None, wb_init=None, b_init=None,
                          fix_parameters=False, rng=None, with_bias=True):
    """Binary Connect Affine, multiplier-less inner-product.

    Binary Connect Affine is an affine function,
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_i = \sum_{i} sign(w_i) x_i.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with Batch Normalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.

    Returns:
        :class:`~nnabla.Variable`

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if wb_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        wb_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.binary_connect_affine(inp, w, wb, b, base_axis)
コード例 #35
0
ファイル: models.py プロジェクト: sony/nnabla-examples
 def last_affine(self, x, dims, name):
     c = x.shape[1]
     l, u = I.calc_uniform_lim_glorot(c, 1)
     w_init = I.UniformInitializer((l, u))
     return PF.affine(x, 1, w_init=w_init, name=name)
コード例 #36
0
ファイル: model.py プロジェクト: TengHu/nnabla-examples
def capsule_layer(u, num_j=10, out_channels=16, num_routing_iter=3, grad_dynamic_routing=False, fix_parameters=False):
    '''
    Takes PrimaryCapules output and produces DigitsCapsules.

    Args:
        u (nnabla.Variable): A shape of [B, in_capsules, in_channels]
        num_j (int): Number of output capsules.
        out_channels (int): Number of units in each capsule of the output.
        num_routing_iter (int): Dynamic routing iterations.
        grad_dynamic_routing (bool): If False, it doesn't compute gradients of
            dynamic routing coefficients as if they are given as
            hyperparameters.
        fix_parameters (bool): Fix parameters (Set need_grad=False).

    Returns:
        nn.Variable: A shape [B, num_j, out_channels].

    '''
    assert num_routing_iter > 0
    batch_size = u.shape[0]
    num_i = u.shape[1]  # 32 * 6 * 6
    in_channels = u.shape[2]

    # Routing u_hat = W u in eq 2.
    # Implementing with broadcast and batch_matmul. Maybe not efficient.

    # Create a parameter tensor
    # Note: Consider num input channels multiplied by num input capsules
    from nnabla.initializer import UniformInitializer, calc_uniform_lim_glorot
    from nnabla.parameter import get_parameter_or_create
    w_init = UniformInitializer(
        calc_uniform_lim_glorot(num_i * in_channels, out_channels))
    w_ij = get_parameter_or_create(
        "W", (1, num_j, num_i, in_channels, out_channels), w_init, not fix_parameters)
    # Tileing w_ij to [batch_size, num_j, num_i, in_channels, out_channels].
    w_ij_tiled = F.broadcast(w_ij, (batch_size,) + w_ij.shape[1:])
    # Tileing u to [batch_size, num_j, num_i, 1, in_channels].
    u = u.reshape((batch_size, 1, num_i, 1, in_channels))
    u_tiled = F.broadcast(u, (batch_size, num_j, num_i, 1, in_channels))
    # Apply batched matrix multiplication:
    # [1, in_channels] * [in_channels, out_channels] --> [1, out_channels]
    # u_hat shape: [batch_size, num_j, num_i, out_channels]
    u_hat = F.batch_matmul(u_tiled, w_ij_tiled).reshape(
        (batch_size, num_j, num_i, out_channels))

    # Dynamic Routing iteration doesn't compute gradients.
    # u_hat only used at the final step of computation of s.
    u_hat_no_grad = u_hat
    if not grad_dynamic_routing:
        u_hat_no_grad = F.identity(u_hat)
        u_hat_no_grad.need_grad = False

    # Dynamic routing described in Procedure 1.
    b = F.constant(0, (batch_size, num_j, num_i, 1))
    for r in range(num_routing_iter):
        # u_hat is only used in the last step.
        uh = u_hat_no_grad
        if r == num_routing_iter - 1:
            uh = u_hat

        # 4: Softmax in eq 3
        c = F.softmax(b, axis=1)
        # 5: Left of eq 2. s shape: [B, num_j, out_channels]
        s = F.sum(c * uh, axis=2)
        # 6: eq 1
        v = squash(s, axis=2)
        if r == num_routing_iter - 1:
            return u_hat, v
        # 7: Update by agreement
        b = b + F.sum(v.reshape((batch_size, num_j, 1, out_channels)) *
                      uh, axis=3, keepdims=True)
コード例 #37
0
def quantized_convolution(inp,
                          outmaps,
                          kernel,
                          pad=None,
                          stride=None,
                          dilation=None,
                          group=1,
                          w_init=None,
                          b_init=None,
                          base_axis=1,
                          fix_parameters=False,
                          rng=None,
                          with_bias=True,
                          quantization_w=None,
                          quantization_b=None):
    """Quantized Convolution.

    Quantized Convolution where the input/output
    relationship is

    .. math::

        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} Q_w(w_{n, m, i, j}) x_{m, a + i, b + j} + Q_b(b_n), 

    where :math:`Q_w(w_{n, m, i, j})` is the weight quantization function
    and :math:`Q_w(b_{n})` is the bias quantization function.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the quantized weights (`quantized weight`)

        2) The weights and the quantized weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the quantized weights will not be in sync.

        3) CPU and GPU implementations now use float value for `quantized weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for weight.
        b_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.
        quantization_w (function): Quantization function that is applied to the the weights.
            Use `None` to not quantize the weights.
        quantization_b (function): Quantization function that is applied to the the bias.
            Use `None` to not quantize the bias.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()

    # Floating Weight
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) +
                                tuple(kernel), w_init, True,
                                not fix_parameters)

    # Quantize weights
    if quantization_w is not None:
        w_q = get_parameter_or_create(
            "W_q", (outmaps, inp.shape[base_axis] // group) + tuple(kernel),
            w_init, False)
        # Link computation graph
        real_w_q = quantization_w(w)
        real_w_q.persistent = True

        w_q.data = real_w_q.data
    else:
        real_w_q = w

    # Bias
    # Floating
    b = None
    b_q = None
    real_b_q = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init, True,
                                    not fix_parameters)
        if quantization_b is not None:
            b_q = get_parameter_or_create("b_q", (outmaps, ), b_init, False)
            # Link computation graph
            real_b_q = quantization_b(b)
            real_b_q.persistent = True

            b_q.data = real_b_q.data
        else:
            real_b_q = b

    return F.convolution(inp, real_w_q, real_b_q, base_axis, pad, stride,
                         dilation, group)
コード例 #38
0
ファイル: parametric_functions.py プロジェクト: zwsong/nnabla
def binary_weight_affine(inp, n_outmaps,
                         base_axis=1,
                         w_init=None, wb_init=None, b_init=None,
                         fix_parameters=False, rng=None, with_bias=True):
    """Binary Weight Affine, multiplier-less inner-product with a scale factor.

    Binary Weight Affine is the affine function, but the inner product
    in this function is the following,

    .. math::

        y_j = \\frac{1}{\\|\\mathbf{w}_j\\|_{\\ell_1}} \sum_{i} sign(w_{ji}) x_i

    Therefore :math:`sign(w_{ji})` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_j\\|_{\\ell_1}}`.
    The number of ::math:`\\alpha` is the outmaps of the affine function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for the binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if wb_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        wb_init, not fix_parameters)
    alpha = get_parameter_or_create(
        "alpha", n_outmaps, ConstantInitializer(0), False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.binary_weight_affine(inp, w, wb, alpha, b, base_axis)