Example #1
0
def INByBatchNorm(inp,
                  axes=[1],
                  decay_rate=0.9,
                  eps=1e-5,
                  fix_parameters=True):
    """Instance Normalization (implemented using BatchNormalization)
    Instance normalization is equivalent to the batch normalization if a batch size is one, in
    other words, it normalizes over spatial dimension(s), meaning all dimensions except for
    the batch and feature dimension.
    """
    assert len(axes) == 1

    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), not fix_parameters)
    mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0),
                                   False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0),
                                  False)
    return F.batch_normalization(inp,
                                 beta,
                                 gamma,
                                 mean,
                                 var,
                                 axes,
                                 decay_rate,
                                 eps,
                                 batch_stat=True,
                                 output_stat=False)
    def __call__(self, x):
        if not isinstance(x, nn._variable.Variable):
            input_variable = nn.Variable(x.shape)
            if isinstance(x, np.ndarray):
                input_variable.d = x
            else:
                input_variable.data = x
        else:
            input_variable = x

        features = self.backbone_model(input_variable,
                                       test=not self.training,
                                       channel_last=self.channel_last)

        output = []
        for head in sorted(self.heads):
            num_output = self.heads[head]
            if self.head_conv > 0:
                with nn.parameter_scope(head + "_conv1"):
                    b_init_param = -2.19 if head == 'hm' else 0.0
                    w_init_param = torch_initializer(
                        features.shape[self.axes],
                        (3, 3)) if head == 'hm' else self.n_init
                    out = pf_convolution(
                        features,
                        self.head_conv, (3, 3),
                        pad=(1, 1),
                        stride=(1, 1),
                        w_init=w_init_param,
                        b_init=ConstantInitializer(b_init_param),
                        with_bias=True,
                        channel_last=self.channel_last)
                    out = F.relu(out, inplace=True)
                with nn.parameter_scope(head + "_final"):
                    w_init_param = torch_initializer(
                        features.shape[self.axes],
                        (1, 1)) if head == 'hm' else self.n_init
                    out = pf_convolution(
                        out,
                        num_output, (1, 1),
                        pad=(0, 0),
                        stride=(1, 1),
                        w_init=w_init_param,
                        b_init=ConstantInitializer(b_init_param),
                        with_bias=True,
                        channel_last=self.channel_last)
            else:
                with nn.parameter_scope(head + "_final"):
                    w_init_param = torch_initializer(
                        features.shape[self.axes],
                        (1, 1)) if head == 'hm' else self.n_init
                    out = pf_convolution(features,
                                         num_output, (1, 1),
                                         pad=(0, 0),
                                         stride=(1, 1),
                                         w_init=w_init_param,
                                         with_bias=True,
                                         channel_last=self.channel_last)
            output.append(out)
        return output
def parametric_fixed_point_quantize_b_xmax(x,
                                           sign=True,
                                           n_init=8,
                                           n_min=2,
                                           n_max=16,
                                           xmax_init=1,
                                           xmax_min=0.001,
                                           xmax_max=10,
                                           fix_parameters=False):
    """Parametric version of `fixed_point_quantize` where the
    bitwidth `b` and dynamic range `xmax` are learnable parameters.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    def clip_scalar(v, min_value, max_value):
        return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value)

    def broadcast_scalar(v, shape):
        return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False),
                           shape=shape)

    def quantize_pow2(v):
        return 2**F.round(F.log(v) / np.log(2.))

    n = get_parameter_or_create("n", (),
                                ConstantInitializer(n_init),
                                need_grad=True,
                                as_need_grad=not fix_parameters)
    xmax = get_parameter_or_create("xmax", (),
                                   ConstantInitializer(xmax_init),
                                   need_grad=True,
                                   as_need_grad=not fix_parameters)

    # ensure that bitwidth is in specified range and an integer
    n = F.round(clip_scalar(n, n_min, n_max))
    if sign:
        n = n - 1

    # ensure that dynamic range is in specified range
    xmax = clip_scalar(xmax, xmax_min, xmax_max)

    # compute step size from dynamic range and make sure that it is a pow2
    d = quantize_pow2(xmax / (2**n - 1))

    # compute min/max value that we can represent
    if sign:
        xmin = -xmax
    else:
        xmin = nn.Variable((1, ), need_grad=False)
        xmin.d = 0.

    # broadcast variables to correct size
    d = broadcast_scalar(d, shape=x.shape)
    xmin = broadcast_scalar(xmin, shape=x.shape)
    xmax = broadcast_scalar(xmax, shape=x.shape)

    # apply fixed-point quantization
    return d * F.round(F.clip_by_value(x, xmin, xmax) / d)
Example #4
0
def inq_convolution(inp, outmaps, kernel,
                    pad=None, stride=None, dilation=None, group=1,
                    num_bits=4, inq_iterations=(), selection_algorithm='random',
                    seed=-1, w_init=None, i_init=None, b_init=None,
                    base_axis=1, fix_parameters=False, rng=None,
                    with_bias=True):
    """Incremental Network Quantization Convolution Layer

    During training, the weights are sequentially quantized to power-of-two
    values, which allows the training of a multiplierless network.

    Using `inq_iterations`, one can specify after how many forward passes
    half of the learnable weights are fixed and quantized to powers-of-two.
    After reaching the last value in `inq_iterations`, all weights are fixed.

    For more details, please refer to the reference.

    Reference:
    Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization:
    Towards lossless CNNs with low-precision weights.
    <https://arxiv.org/abs/1702.03044>

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        num_bits (int): Number of bits per weight. Value has to be larger than 1 as one bit is already used to code the value "0"
        inq_iterations (tuple of int): Tuple of iteration numbers at which we fix half of the weights.
        selection_algorithm (str): Chooses algorithm that is used to decide which weights are fixed. ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly)
        seed (int): Random seed for INQ algorithm
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        i_init (~nnabla.initializer.BaseInitializer): Initializer for the indicators (0 ... learnable, 1 ... fixed).
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if i_init is None:
        i_init = ConstantInitializer()
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    i = get_parameter_or_create(
        "I", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        i_init, False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.inq_convolution(inp, w, i, b, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)
Example #5
0
 def __call__(self, x, axes=[1], training=True, name=''):
     shape = [1] * x.ndim
     m = nn.parameter.get_parameter_or_create('m-{}'.format(name), shape,
                                              ConstantInitializer())
     M = nn.parameter.get_parameter_or_create('M-{}'.format(name), shape,
                                              ConstantInitializer())
     y = self._function(x, m, M, training=training)
     return y
Example #6
0
def CCBN(h,
         y,
         n_classes,
         decay_rate=0.999,
         test=False,
         fix_parameters=False,
         coefs=[1.0]):
    """Categorical Conditional Batch Normaliazation"""
    # Call the batch normalization once
    shape_stat = [1 for _ in h.shape]
    shape_stat[1] = h.shape[1]
    gamma_tmp = nn.Variable.from_numpy_array(np.ones(shape_stat))
    beta_tmp = nn.Variable.from_numpy_array(np.zeros(shape_stat))
    mean = get_parameter_or_create("mean", shape_stat,
                                   ConstantInitializer(0.0), False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(1.0),
                                  False)
    h = F.batch_normalization(h,
                              beta_tmp,
                              gamma_tmp,
                              mean,
                              var,
                              decay_rate=decay_rate,
                              batch_stat=not test)

    # Condition the gamma and beta with the class label
    b, c = h.shape[0:2]

    def embed_func(y, initializer):
        if type(y) != list:
            o = embed(y,
                      n_classes,
                      c,
                      initializer=initializer,
                      sn=False,
                      test=test)
        else:
            y_list = y
            o = reduce(lambda x, y: x + y, [
                coef * embed(y,
                             n_classes,
                             c,
                             initializer=initializer,
                             sn=False,
                             test=test) for coef, y in zip(coefs, y_list)
            ])
        return o

    with nn.parameter_scope("gamma"):
        gamma = embed_func(y, ConstantInitializer(1.0))
        gamma = F.reshape(gamma, [b, c] + [1 for _ in range(len(h.shape[2:]))])
        gamma = F.broadcast(gamma, h.shape)
    with nn.parameter_scope("beta"):
        beta = embed_func(y, ConstantInitializer(0.0))
        beta = F.reshape(beta, [b, c] + [1 for _ in range(len(h.shape[2:]))])
        beta = F.broadcast(beta, h.shape)
    return gamma * h + beta
Example #7
0
def LN(inp, fix_parameters=False):
    """Layer normalization.
    """
    beta_shape = (1, inp.shape[1], 1, 1)
    gamma_shape = (1, inp.shape[1], 1, 1)
    beta = get_parameter_or_create("beta", beta_shape, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", gamma_shape,
                                    ConstantInitializer(1), not fix_parameters)
    return f_layer_normalization(inp, beta, gamma)
Example #8
0
    def __init__(self,
                 n_features,
                 n_dims,
                 axes=[1],
                 decay_rate=0.9,
                 eps=1e-5,
                 output_stat=False,
                 fix_parameters=False,
                 param_init=None,
                 name=''):
        Module.__init__(self, name=name)
        self._scope_name = f'<batchnorm at {hex(id(self))}>'

        assert len(axes) == 1

        shape_stat = [1 for _ in range(n_dims)]
        shape_stat[axes[0]] = n_features

        if param_init is None:
            param_init = {}
        beta_init = param_init.get('beta', ConstantInitializer(0))
        gamma_init = param_init.get('gamma', ConstantInitializer(1))
        mean_init = param_init.get('mean', ConstantInitializer(0))
        var_init = param_init.get('var', ConstantInitializer(1))

        if fix_parameters:
            self._beta = nn.Variable.from_numpy_array(beta_init(shape_stat))
            self._gamma = nn.Variable.from_numpy_array(gamma_init(shape_stat))
        else:
            self._beta = Parameter(shape_stat,
                                   initializer=beta_init,
                                   scope=self._scope_name)
            self._gamma = Parameter(shape_stat,
                                    initializer=gamma_init,
                                    scope=self._scope_name)

        self._mean = Parameter(shape_stat,
                               need_grad=False,
                               initializer=mean_init,
                               scope=self._scope_name)
        self._var = Parameter(shape_stat,
                              need_grad=False,
                              initializer=var_init,
                              scope=self._scope_name)
        self._axes = axes
        self._decay_rate = decay_rate
        self._eps = eps
        self._n_features = n_features
        self._fix_parameters = fix_parameters
        self._output_stat = output_stat
Example #9
0
def batch_normalization(inp,
                        axes=[1],
                        decay_rate=0.9,
                        eps=1e-5,
                        batch_stat=True,
                        output_stat=False):
    """
    Batch normalization layer.

    .. math::
        \\begin{array}{lcl}
        \\mu &=& \\frac{1}{M} \\sum x_i\\\\
        \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\
        \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\
        y_i &=& \\hat{x}_i \\gamma + \\beta.
        \\end{array}

    where :math:`x_i, y_i` are the inputs.
    In testing, the mean and variance computed by moving average calculated during training are used.

    Args:
        inp (~nnabla.Variable): N-D array of input.
        axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken.
        decay_rate (float): Decay rate of running mean and variance.
        eps (float): Tiny value to avoid zero division by std.
        batch_stat (bool): Use mini-batch statistics rather than running ones.
        output_stat (bool): Output batch mean and variance.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    References:

        - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167

    """
    assert len(axes) == 1
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   True)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), True)
    mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0),
                                   False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0),
                                  False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate,
                                 eps, batch_stat, output_stat)
Example #10
0
def mlp_gradient_synthesizer(x, y=None, test=False):
    maps = x.shape[1]
    if y is not None:
        h = F.one_hot(y, (10, ))
        h = F.concatenate(*[x, y], axis=1)
    else:
        h = x
    with nn.parameter_scope("gs"):
        h = act_bn_linear(h, maps, test, name="fc0")
        h = act_bn_linear(h, maps, test, name="fc1")
        w_init = ConstantInitializer(0)
        b_init = ConstantInitializer(0)
        g_pred = PF.affine(h, maps, w_init=w_init, b_init=b_init, name="fc")
        g_pred.persistent = True
    return g_pred
Example #11
0
    def __init__(self,
                 n_inmaps,
                 n_outmaps,
                 base_axis=1,
                 w_init=None,
                 b_init=None,
                 fix_parameters=False,
                 rng=None,
                 with_bias=True):
        if not hasattr(n_outmaps, '__iter__'):
            n_outmaps = [n_outmaps]
        n_outmaps = list(n_outmaps)
        n_outmap = int(np.prod(n_outmaps))
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                n_inmaps, n_outmap),
                                        rng=rng)
        if with_bias and b_init is None:
            b_init = ConstantInitializer()
        w_shape = (n_inmaps, n_outmap)
        w = nn.Variable.from_numpy_array(
            w_init(w_shape)).apply(need_grad=not fix_parameters)
        b = None
        if with_bias:
            b_shape = (n_outmap, )
            b = nn.Variable.from_numpy_array(
                b_init(b_shape)).apply(need_grad=not fix_parameters)

        self.W = w
        self.b = b
        self.base_axis = base_axis
Example #12
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                itr=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True,
                sn=True,
                test=False,
                init_scale=1.0):
    """
    """
    if w_init is None:
        l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps,
                                       tuple(kernel))
        l, u = init_scale * l, init_scale * u
        w_init = UniformInitializer((l, u), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) +
                                tuple(kernel), w_init, not fix_parameters)
    w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
Example #13
0
    def __init__(self,
                 in_features,
                 out_features,
                 base_axis=1,
                 w_init=None,
                 b_init=None,
                 rng=None,
                 bias=True,
                 name=''):
        Module.__init__(self, name=name)
        self._scope_name = f'<linear at {hex(id(self))}>'

        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                in_features, out_features),
                                        rng=rng)
        self._W = Parameter((in_features, out_features),
                            initializer=w_init,
                            scope=self._scope_name)
        self._b = None

        if bias:
            if b_init is None:
                b_init = ConstantInitializer()
            self._b = Parameter((out_features, ),
                                initializer=b_init,
                                scope=self._scope_name)

        self._base_axis = base_axis
        self._in_features = in_features
        self._out_features = out_features
Example #14
0
    def __init__(self, inmaps, outmaps, kernel,
                 pad=None, stride=None, dilation=None, group=1,
                 w_init=None, b_init=None,
                 base_axis=1, fix_parameters=False, rng=None, with_bias=True):
        if w_init is None:
            w_init = UniformInitializer(
                calc_uniform_lim_glorot(inmaps, outmaps, tuple(kernel)), rng=rng)
        if with_bias and b_init is None:
            b_init = ConstantInitializer()
        w_shape = (outmaps, inmaps // group) + tuple(kernel)
        w = nn.Variable.from_numpy_array(
            w_init(w_shape)).apply(need_grad=not fix_parameters)
        b = None
        if with_bias:
            b_shape = (outmaps, )
            b = nn.Variable.from_numpy_array(
                b_init(b_shape)).apply(need_grad=not fix_parameters)

        self.W = w
        self.b = b
        self.base_axis = base_axis
        self.pad = pad
        self.stride = stride
        self.dilation = dilation
        self.group = group
Example #15
0
def attnblock(h, r=8, fix_parameters=False, sn=True, test=False):
    """Attention block"""
    x = h

    # 1x1 convolutions
    b, c, s0, s1 = h.shape
    c_r = c // r
    assert c_r > 0
    f_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="f",
                      with_bias=False, sn=sn, test=test)
    g_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="g",
                      with_bias=False, sn=sn, test=test)
    h_x = convolution(h, c, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="h",
                      with_bias=False, sn=sn, test=test)

    # Attend
    attn = F.batch_matmul(f_x.reshape(
        [b, c_r, -1]), g_x.reshape([b, c_r, -1]), transpose_a=True)
    attn = F.softmax(attn, 1)
    h_x = h_x.reshape([b, c, -1])
    o = F.batch_matmul(h_x, attn)
    o = F.reshape(o, [b, c, s0, s1])

    # Shortcut
    gamma = get_parameter_or_create(
        "gamma", [1, 1, 1, 1], ConstantInitializer(0.), not fix_parameters)
    y = gamma * o + x
    return y
Example #16
0
def BN(inp, axes=[1], decay_rate=0.9, eps=1e-5,
       batch_stat=True, output_stat=False, fix_parameters=False):
    """Batch Normalization
    """
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create(
        "beta", shape_stat, ConstantInitializer(0), not fix_parameters)
    gamma = get_parameter_or_create(
        "gamma", shape_stat, ConstantInitializer(1), not fix_parameters)
    mean = get_parameter_or_create(
        "mean", shape_stat, ConstantInitializer(0), False)
    var = get_parameter_or_create(
        "var", shape_stat, ConstantInitializer(0), False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes,
                                 decay_rate, eps, batch_stat, output_stat)
Example #17
0
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           itr=1,
           fix_parameters=False, rng=None, with_bias=True,
           sn=True, test=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    w_sn = spectral_normalization_for_affine(
        w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w_sn, b, base_axis)
Example #18
0
    def modify(self, f, inputs):
        fname = f.info.type_name
        if not fname in self._fct_set:
            return

        # Next or Previous func is not BatchNorm
        next_func = f.outputs[0].function_references[0]
        prev_func = f.inputs[0].parent
        if (prev_func == None
                or prev_func.info.type_name != 'BatchNormalization') \
                and next_func.info.type_name != 'BatchNormalization':
            return

        x = inputs[0]
        w = inputs[1]
        b = inputs[2] if len(inputs) == 3 else None

        if b is not None:
            return

        scope = self.get_parameter_scope(w)
        n_outmaps = w.shape[1] if fname == 'Affine' else w.shape[0]
        with nn.parameter_scope(scope):
            b = get_parameter_or_create('b', (n_outmaps, ),
                                        ConstantInitializer(), True, True)
        h = self.connect(f, x, w, b)
        return h
Example #19
0
def conv(inp,
         outmaps,
         kernel,
         pad=None,
         stride=None,
         dilation=None,
         group=1,
         w_init=None,
         b_init=None,
         base_axis=1,
         fix_parameters=False,
         rng=None,
         with_bias=True,
         use_wscale=True,
         use_he_backward=False):
    """
    """
    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis],
                                          outmaps,
                                          kernel=kernel)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis],
                                         outmaps,
                                         kernel=kernel)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], outmaps, tuple(kernel)),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)

    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Example #20
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel,
                 pad=None,
                 stride=None,
                 dilation=None,
                 group=1,
                 w_init=None,
                 b_init=None,
                 base_axis=1,
                 fix_parameters=False,
                 rng=None,
                 with_bias=True,
                 channel_last=False,
                 name=''):

        Module.__init__(self, name=name)
        self._scope_name = f'<convolution at {hex(id(self))}>'

        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                in_channels, out_channels, tuple(kernel)),
                                        rng=rng)

        w_shape = (out_channels, in_channels // group) + tuple(kernel)
        b_shape = (out_channels, )

        self._b = None
        if with_bias and b_init is None:
            b_init = ConstantInitializer()

        if fix_parameters:
            self._W = nn.Variable.from_numpy_array(w_init(w_shape))
            if with_bias:
                self._b = nn.Variable.from_numpy_array(b_init(b_shape))
        else:
            self._W = Parameter(w_shape,
                                initializer=w_init,
                                scope=self._scope_name)
            if with_bias:
                self._b = Parameter(b_shape,
                                    initializer=b_init,
                                    scope=self._scope_name)

        self._base_axis = base_axis
        self._pad = pad
        self._stride = stride
        self._dilation = dilation
        self._group = group
        self._kernel = kernel
        self._in_channels = in_channels
        self._out_channels = out_channels
        self._channel_last = channel_last
        self._fix_parameters = fix_parameters
        self._rng = rng
Example #21
0
def _init_beta_gamma(shape, fix_parameters, param_init, no_bias, no_scale):
    from nnabla.parameter import get_parameter_or_create
    from nnabla.initializer import ConstantInitializer

    if no_bias:
        beta = None
    else:
        beta_init = param_init.get('beta', ConstantInitializer(0))
        beta = get_parameter_or_create("beta", shape, beta_init, True,
                                       not fix_parameters)

    if no_scale:
        gamma = None
    else:
        gamma_init = param_init.get('gamma', ConstantInitializer(1))
        gamma = get_parameter_or_create("gamma", shape, gamma_init, True,
                                        not fix_parameters)

    return beta, gamma
Example #22
0
def _get_generator(proto):
    if proto.type == 'Normal':
        return NormalInitializer(sigma=proto.multiplier)
    elif proto.type == 'Uniform':
        return UniformInitializer(lim=(-proto.multiplier, proto.multiplier))
    elif proto.type == 'Constant':
        return ConstantInitializer(value=proto.multiplier)
    else:
        raise ValueError('Generator type "' +
                         proto.type + '" is not supported.')
Example #23
0
def _create_variable(v, name, shape):
    # Create and initialize variables
    class Variable:
        pass

    parameter = v.type == "Parameter"
    variable_instance = None
    if parameter:
        if v.initializer.type == 'Normal':
            initializer = NormalInitializer(v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Uniform':
            initializer = UniformInitializer(
                lim=[-v.initializer.multiplier, v.initializer.multiplier])
        elif v.initializer.type == 'UniformAffineGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'UniformConvolutionGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Constant':
            initializer = ConstantInitializer(value=v.initializer.multiplier)
        else:
            initializer = None
        variable_instance = get_parameter_or_create(name, shape, initializer)
    else:
        # create empty variable, memory will be allocated in network.setup()
        # after network optimization
        variable_instance = nn.Variable()

    variable = Variable()
    variable.name = name
    variable.parameter = parameter
    variable.shape = shape
    variable.variable_instance = variable_instance

    return variable
Example #24
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True):
    """
    N-D Convolution with a bias term.

    For Dilated Convolution (a.k.a. Atrous Convolusion), refer to:

    - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915

    - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Example #25
0
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True,
           use_wscale=True,
           use_he_backward=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))

    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], n_outmaps),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)

    return F.affine(inp, w, b, base_axis)
Example #26
0
def noisy_layer(x, out_size, name):
    inpt_size = x.shape[1]
    root_p = np.sqrt(inpt_size)
    mu_init = UniformInitializer((-1.0 / root_p, 1.0 / root_p))
    sig_init = ConstantInitializer(0.5 / root_p)
    eps_w, eps_b = sample_noise(inpt_size, out_size)
    with nn.parameter_scope(name):
        mu_w = get_parameter_or_create('mu_w', (inpt_size, out_size), mu_init)
        sig_w = get_parameter_or_create('sig_w', (inpt_size, out_size),
                                        sig_init)
        mu_b = get_parameter_or_create('mu_b', (out_size, ), mu_init)
        sig_b = get_parameter_or_create('sig_b', (out_size, ), sig_init)
    return F.affine(x, mu_w + sig_w * eps_w, mu_b + sig_b * eps_b)
Example #27
0
def masked_convolution(inp,
                       outmaps,
                       kernel,
                       pad=None,
                       stride=None,
                       dilation=None,
                       group=1,
                       w_init=None,
                       b_init=None,
                       base_axis=1,
                       fix_parameters=False,
                       rng=None,
                       with_bias=True):
    """
    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    mask_w = get_parameter_or_create("Mw", w.shape, ConstantInitializer(0.),
                                     False)
    w_masked = w * mask_w
    b = None
    b_masked = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
        mask_b = get_parameter_or_create("Mb", b.shape,
                                         ConstantInitializer(0.), False)
        b_masked = b * mask_b

    return F.convolution(inp, w_masked, b_masked, base_axis, pad, stride,
                         dilation, group)
Example #28
0
def cnn_gradient_synthesizer(x, y=None, test=False):
    bs = x.shape[0]
    maps = x.shape[1]
    s0, s1 = x.shape[2:]
    if y is not None:
        h = F.one_hot(y, (10, ))
        h = F.reshape(h, (bs, 10, 1, 1))
        h = F.broadcast(h, (bs, 10, s0, s1))
        h = F.concatenate(*[x, h], axis=1)
    else:
        h = x
    with nn.parameter_scope("gs"):
        h = act_bn_conv(h, maps, test, name="conv0")
        w_init = ConstantInitializer(0)
        b_init = ConstantInitializer(0)
        g_pred = PF.convolution(h,
                                maps,
                                kernel=(3, 3),
                                pad=(1, 1),
                                w_init=w_init,
                                b_init=b_init,
                                name="conv")
        g_pred.persistent = True
    return g_pred
Example #29
0
def IN(inp, axes=[1], decay_rate=0.9, eps=1e-5, fix_parameters=True):
    """Instance Normalization
    """
    if inp.shape[0] == 1:
        return INByBatchNorm(inp, axes, decay_rate, eps, fix_parameters)

    b, c = inp.shape[0:2]
    spacial_shape = inp.shape[2:]

    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), not fix_parameters)

    # Instance normalization
    # normalize over spatial dimensions
    axis = [i for i in range(len(inp.shape)) if i > 1]
    mean = F.sum(inp, axis=axis, keepdims=True) / np.prod(axis)
    var = F.pow_scalar(F.sum(inp - mean, axis=axis, keepdims=True),
                       2.0) / np.prod(axis)
    h = (inp - mean) / F.pow_scalar(var + eps, 0.5)
    return gamma * inp + beta
Example #30
0
    def __init__(self,
                 n_features,
                 n_dims,
                 axes=[1],
                 decay_rate=0.9,
                 eps=1e-5,
                 batch_stat=True,
                 output_stat=False,
                 fix_parameters=False,
                 param_init=None):
        assert len(axes) == 1
        shape_stat = [1 for _ in range(n_dims)]
        shape_stat[axes[0]] = n_features

        if param_init is None:
            param_init = {}
        beta_init = param_init.get('beta', ConstantInitializer(0))
        gamma_init = param_init.get('gamma', ConstantInitializer(1))
        mean_init = param_init.get('mean', ConstantInitializer(0))
        var_init = param_init.get('var', ConstantInitializer(1))

        beta = nn.Variable.from_numpy_array(
            beta_init(shape_stat)).apply(need_grad=not fix_parameters)
        gamma = nn.Variable.from_numpy_array(
            gamma_init(shape_stat)).apply(need_grad=not fix_parameters)
        mean = nn.Variable.from_numpy_array(mean_init(shape_stat))
        var = nn.Variable.from_numpy_array(var_init(shape_stat))

        self.beta = beta
        self.gamma = gamma
        self.mean = mean
        self.var = var
        self.axes = axes
        self.decay_rate = decay_rate
        self.eps = eps
        self.output_stat = output_stat