Exemple #1
0
def weight_normalization_backward(inputs, dim=0, eps=1e-12):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    w = inputs[1]
    g = inputs[2]
    g_shape = g.shape
    dim += w.ndim*(dim < 0)

    # Create inverted norm of w
    sum_axes = list(filter(lambda x: x != dim, range(w.ndim)))
    w_pow = F.pow_scalar(w, 2.0)
    w_sum = F.sum(w_pow, sum_axes, True)
    w_add = F.add_scalar(w_sum, eps)
    w_norm_inv = F.pow_scalar(w_add, -0.5)

    dyw_sum = F.sum(dy * w, sum_axes, True)

    # w.r.t. dw
    g = g.reshape([s if i == dim else 1 for i, s in enumerate(w.shape)])
    dw = (dy - dyw_sum * (w_norm_inv ** 2) * w) * g * w_norm_inv

    # w.r.t. dg
    dg = dyw_sum * w_norm_inv
    dg = dg.reshape(g_shape)

    return dw, dg
Exemple #2
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Computation
        if prop_down[0]:
            if accum[0]:
                g_x0 -= g_dx0 * dy * F.pow_scalar(x0, -2.0)
            else:
                g_x0.copy_from(-g_dx0 * dy * F.pow_scalar(x0, -2.0))
        if prop_down[1]:
            inp = nn.Variable(x0.shape).apply(data=x0,
                                              grad=g_dy,
                                              need_grad=True)
            out = nn.Variable(dy.shape).apply(grad=g_dx0)
            self.forward_func.backward([inp], [out], accum=[accum[1]])
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        axis = self.forward_func.info.args["axis"]

        # Inputs
        y0 = inputs[0].data
        dz = inputs[2].data
        # Outputs
        dy0 = outputs[0].data
        # Grads of inputs
        g_y0 = inputs[0].grad
        g_dz = inputs[2].grad
        # Grads of outputs
        g_dy0 = outputs[0].grad

        # Computation
        if prop_down[0]:
            if accum[0]:
                g_y0 += g_dy0 * dz * F.pow_scalar(y0, -2.0)
            else:
                g_y0.copy_from(g_dy0 * dz * F.pow_scalar(y0, -2.0))
        if prop_down[2]:
            if accum[2]:
                g_dz -= F.sum(g_dy0 * F.pow_scalar(y0, -1.0), axis, True)
            else:
                g_dz.copy_from(-F.sum(g_dy0 *
                                      F.pow_scalar(y0, -1.0), axis, True))
Exemple #4
0
def f_layer_normalization(inp, beta, gamma):
    use_axis = [x for x in range(1, inp.ndim)]
    inp = F.sub2(inp, F.mean(inp, axis=use_axis, keepdims=True))
    inp = F.div2(
        inp,
        F.pow_scalar(
            F.mean(F.pow_scalar(inp, 2), axis=use_axis, keepdims=True), 0.5))
    return inp * F.broadcast(gamma, inp.shape) + F.broadcast(beta, inp.shape)
Exemple #5
0
def distance(u, v, eps=1e-5):
    uu = F.sum(F.pow_scalar(u, 2), axis=1)
    vv = F.sum(F.pow_scalar(v, 2), axis=1)
    euclid_norm_pow2 = F.sum(F.pow_scalar(u - v, 2), axis=1)
    alpha = F.maximum2(F.constant(eps, shape=uu.shape), 1.0 - uu)
    beta = F.maximum2(F.constant(eps, shape=vv.shape), 1.0 - vv)

    return F.acosh(1 + 2 * euclid_norm_pow2 / (alpha * beta))
Exemple #6
0
def sigmas_regularization(ctx, log_var0, log_var1):
    with nn.context_scope(ctx):
        h0 = F.exp(log_var0)
        h0 = F.pow_scalar(h0, 0.5)
        h1 = F.exp(log_var1)
        h1 = F.pow_scalar(h1, 0.5)
        r = F.mean(F.squared_error(h0, h1))
    return r
Exemple #7
0
def sigmas_regularization(ctx, log_var0, log_var1):
    with nn.context_scope(ctx):
        h0 = F.exp(log_var0)
        h0 = F.pow_scalar(h0, 0.5)
        h1 = F.exp(log_var1)
        h1 = F.pow_scalar(h1, 0.5)
        r = F.mean(F.squared_error(h0, h1))
    return r
Exemple #8
0
def minibatch_stddev(x, eps=1e-8):
    b, _, h, w = x.shape
    mean = F.mean(x, axis=0, keepdims=True)
    std = F.pow_scalar(
        F.mean(F.pow_scalar(F.sub2(x, F.broadcast(mean, x.shape)), 2.),
               axis=0,
               keepdims=True) + eps, 0.5)
    std_chanel = F.broadcast(F.mean(std, keepdims=True), (b, 1, h, w))
    x = F.concatenate(x, std_chanel, axis=1)
    return x
Exemple #9
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    var0 = F.exp(log_var0)
    var1 = F.exp(log_var1)
    s0 = F.pow_scalar(var0, 0.5)
    s1 = F.pow_scalar(var0, 0.5)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss = F.log(s1/s0) + (var0/var1 + squared_error/var1) * 0.5
        loss_sr = F.mean(loss)
    return loss_sr
Exemple #10
0
def lsgan_loss(d_fake, d_real=None, persistent=True):
    if d_real:  # Discriminator loss
        loss_d_real = F.mean(F.pow_scalar(d_real - 1., 2.))
        loss_d_fake = F.mean(F.pow_scalar(d_fake, 2.))
        loss = (loss_d_real + loss_d_fake) * 0.5
        loss.persistent = persistent
        return loss
    else:  # Generator loss, this form leads to minimization
        loss = F.mean(F.pow_scalar(d_fake - 1., 2.))
        loss.persistent = persistent
        return loss
Exemple #11
0
 def compute_mel(self, wave):
     hp = self.hparams
     reals, imags = F.stft(wave,
                           window_size=hp.win_length,
                           stride=hp.hop_length,
                           fft_size=hp.n_fft)
     linear = F.pow_scalar(
         F.add2(F.pow_scalar(reals, 2), F.pow_scalar(imags, 2)), 0.5)
     mels = F.batch_matmul(self.basis, linear)
     mels = F.log(F.clip_by_value(mels, 1e-5,
                                  np.inf)).apply(need_grad=False)
     return mels
Exemple #12
0
    def __call__(self, gen_rgb_out):

        out = conv_layer(gen_rgb_out, inmaps=3,
                         outmaps=self.channels[0], kernel_size=1, name_scope='Discriminator/Convinitial')

        inmaps = self.channels[0]
        for i in range(1, len(self.resolutions)):
            res = out.shape[2]
            outmaps = self.channels[i]
            out = res_block(out, res=res, outmaps=outmaps, inmaps=inmaps)
            inmaps = outmaps

        N, C, H, W = out.shape
        group = min(N, self.stddev_group)
        stddev_mean = F.reshape(
            out, (group, -1, self.stddev_feat, C // self.stddev_feat, H, W), inplace=False)

        # mean = F.mean(stddev_mean, axis=0, keepdims=True)
        mean = F.mul_scalar(F.sum(stddev_mean, axis=0, keepdims=True),
                            1.0/stddev_mean.shape[0], inplace=False)

        stddev_mean = F.mean(F.pow_scalar(F.sub2(stddev_mean, F.broadcast(
            mean, stddev_mean.shape)), 2.), axis=0, keepdims=False)
        stddev_mean = F.pow_scalar(F.add_scalar(
            stddev_mean, 1e-8, inplace=False), 0.5, inplace=False)

        stddev_mean = F.mean(stddev_mean, axis=[2, 3, 4], keepdims=True)
        stddev_mean = F.reshape(
            stddev_mean, stddev_mean.shape[:2]+stddev_mean.shape[3:], inplace=False)

        out = F.concatenate(out, F.tile(stddev_mean, (group, 1, H, W)), axis=1)

        out = conv_layer(out, inmaps=out.shape[1], outmaps=self.channels[-1],
                         kernel_size=3, name_scope='Discriminator/Convfinal')

        out = F.reshape(out, (N, -1), inplace=False)

        # Linear Layers
        lrmul = 1
        scale = 1/(out.shape[1]**0.5)*lrmul
        W, bias = weight_init_fn(
            (out.shape[-1], self.channels[-1]), weight_var='Discriminator/final_linear_1/affine')
        out = F.affine(out, W*scale, bias*lrmul)
        out = F.mul_scalar(F.leaky_relu(
            out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False)

        scale = 1/(out.shape[1]**0.5)*lrmul
        W, bias = weight_init_fn(
            (out.shape[-1], 1), weight_var='Discriminator/final_linear_2/affine')
        out = F.affine(out, W*scale, bias*lrmul)

        return out
Exemple #13
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        h = F.mean(h, axis=1)
        r = F.mean(F.squared_error(h, one))
    return r
Exemple #14
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        h = F.mean(h, axis=1)
        r = F.mean(F.squared_error(h, one))
    return r
Exemple #15
0
def sample_noise(inpt_size, out_size):
    _f = lambda x: F.sign(x) * F.pow_scalar(F.abs(x), 0.5)
    noise = _f(F.randn(shape=(inpt_size + out_size, )))
    eps_w = F.batch_matmul(F.reshape(noise[:inpt_size], (1, -1)),
                           F.reshape(noise[inpt_size:], (1, -1)), True)
    eps_b = noise[inpt_size:]
    return eps_w, eps_b
Exemple #16
0
def norm_backward(inputs, p=None, axes=None, keep_dims=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]

    if p is None:
        p = 2.0
    axes = list(range(x0.ndim)) if axes is None else force_list(axes)

    x_abs = F.abs(x0)
    x_pow = F.pow_scalar(x_abs, p)
    x_sum = F.sum(x_pow, axes, keepdims=True)

    # Add axis for mul2
    if not keep_dims:
        shape = list(x0.shape)
        for a in axes:
            shape[a] = 1
        dy = dy.reshape(shape)

    x_sign = no_grad(F.sign(x0))
    dx = dy * x_sum**(1. / p - 1.) * x_abs**(p - 1.) * x_sign

    return dx
Exemple #17
0
def ce_loss_with_uncertainty(ctx, pred, y_l, log_var):
    r = F.randn(0., 1., log_var.shape)
    r = F.pow_scalar(F.exp(log_var), 0.5) * r
    h = pred + r
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(h, y_l))
    return loss_ce
Exemple #18
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        b = log_var.shape[0]
        r = F.sum(F.squared_error(h, one)) / b
    return r
Exemple #19
0
def kl_divergence(ctx, pred, label, log_var):
    with nn.context_scope(ctx):
        s = F.pow_scalar(F.exp(log_var), 0.5)
        elms = softmax_with_temperature(ctx, label, s) \
               * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
Exemple #20
0
def ce_loss_with_uncertainty(ctx, pred, y_l, log_var):
    r = F.randn(0., 1., log_var.shape)
    r = F.pow_scalar(F.exp(log_var), 0.5) * r
    h = pred + r
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(h, y_l))
    return loss_ce
Exemple #21
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        b = log_var.shape[0]
        r = F.sum(F.squared_error(h, one)) / b
    return r
Exemple #22
0
def kl_divergence(ctx, pred, label, log_var):
    with nn.context_scope(ctx):
        s = F.pow_scalar(F.exp(log_var), 0.5)
        elms = softmax_with_temperature(ctx, label, s) \
               * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
Exemple #23
0
def norm_normalization_backward(inputs, p=None, axes=None, eps=1e-12):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]

    if p is None:
        p = 2.0
    axes = list(range(x0.ndim)) if axes is None else force_list(axes)

    x_abs = F.abs(x0)
    x_pow = F.pow_scalar(x_abs, p)
    x_sum = F.sum(x_pow, axes, keepdims=True)
    # x_norm = x_sum ** (1./p)

    # Div2 backward
    dx = dy * x_sum**(-1. / p)
    dx_norm = -dy * x0 * x_sum**(-2. / p)
    dx_norm = sum_for_arithmetics(dx_norm, x_sum)

    # Norm backward
    x_sign = no_grad(F.sign(x0))
    dx += dx_norm * x_sum**(1. / p - 1.) * x_abs**(p - 1.) * x_sign

    return dx
Exemple #24
0
def lsgan_loss(real, weight, fake=None):
    if fake:
        loss = weight * F.mean(F.squared_error(F.constant(1, real.shape), real)
                               + F.pow_scalar(fake, 2))
    else:
        loss = weight * \
            F.mean(F.squared_error(F.constant(1, real.shape), real))
    return loss
Exemple #25
0
def lsgan_loss(feat, target_is_real=True, persistent=True):
    if target_is_real:
        label = F.constant(1, shape=feat.shape)
    else:
        label = F.constant(0, shape=feat.shape)
    loss = F.mean(F.pow_scalar(feat - label, 2.0))
    loss.persistent = persistent
    return loss
Exemple #26
0
 def regularize_noise(self, noises):
     loss = 0
     for noise in noises:
         size = noise.shape[2]
         while True:
             loss = (loss + F.pow_scalar(
                 F.mean(noise * F.shift(
                     noise, shifts=(0, 0, 0, 1), border_mode='reflect')), 2)
                     + F.pow_scalar(
                         F.mean(noise * F.shift(noise,
                                                shifts=(0, 0, 1, 0),
                                                border_mode='reflect')), 2))
             if size <= 8:
                 break
             noise = F.reshape(noise, [-1, 1, size // 2, 2, size // 2, 2])
             noise = F.mean(noise, [3, 5])
             size //= 2
     return loss
Exemple #27
0
def spectral_normalization_for_affine(w,
                                      itr=1,
                                      eps=1e-12,
                                      input_axis=1,
                                      test=False):
    W_sn = get_parameter_or_create("W_sn", w.shape, ConstantInitializer(0),
                                   False)
    if test:
        return W_sn

    d0 = np.prod(w.shape[0:-1])  # In
    d1 = np.prod(w.shape[-1])  # Out
    u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(),
                                 False)
    u = F.reshape(u0, [d1, 1])
    # Power method
    for _ in range(itr):
        # v
        v = F.affine(w, u)
        v = F.div2(
            v,
            F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5))
        v = F.reshape(v, [1, d0])
        # u
        u = F.affine(v, w)
        u = F.div2(
            u,
            F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5))
        u = F.reshape(u, [d1, 1])
    # Iterate
    u = F.identity(u, outputs=[u0.data])
    u.persistent = True
    # No grad
    u.need_grad = False
    v.need_grad = False
    # Spectral normalization
    wv = F.affine(v, w)
    sigma = F.affine(wv, u)
    sigma = F.broadcast(F.reshape(sigma, [1 for _ in range(len(w.shape))]),
                        w.shape)
    w_sn = F.div2(w, sigma, outputs=[W_sn.data])
    w_sn.persistent = True
    return w_sn
Exemple #28
0
def gen_path_regularize(fake_img,
                        latents,
                        mean_path_length,
                        decay=0.01,
                        pl_weight=2.0):

    noise = F.randn(shape=fake_img.shape) / \
                    np.sqrt(fake_img.shape[2]*fake_img.shape[3])

    gradient = nn.grad([F.sum(fake_img * noise)], [latents])[0]
    path_lengths = F.mean(F.sum(F.pow_scalar(gradient, 2), axis=1), axis=0)
    path_lengths = F.pow_scalar(path_lengths, 0.5)

    path_mean = mean_path_length + decay * \
        (F.mean(path_lengths) - mean_path_length)

    path_penalty = F.mean(
        F.pow_scalar(path_lengths - F.reshape(path_mean, (1, ), inplace=False),
                     1))
    return path_penalty * pl_weight, path_mean, path_lengths
Exemple #29
0
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False):
    w_shape = w.shape
    W_sn = get_parameter_or_create("W_sn", w_shape, ConstantInitializer(0),
                                   False)
    if test:
        return W_sn

    d0 = w.shape[0]  # Out
    d1 = np.prod(w.shape[1:])  # In
    w = F.reshape(w, [d0, d1], inplace=False)
    u0 = get_parameter_or_create("singular-vector", [d0], NormalInitializer(),
                                 False)
    u = F.reshape(u0, [1, d0])
    # Power method
    for _ in range(itr):
        # v
        v = F.affine(u, w)
        v = F.div2(
            v,
            F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5))
        v = F.reshape(v, [d1, 1])
        # u
        u = F.affine(w, v)
        u = F.div2(
            u,
            F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5))
        u = F.reshape(u, [1, d0])
    # Iterate
    u = F.identity(u, outputs=[u0.data])
    u.persistent = True
    # No grad
    u.need_grad = False
    v.need_grad = False
    # Spectral normalization
    wv = F.affine(w, v)
    sigma = F.affine(u, wv)
    w_sn = F.div2(w, sigma)
    w_sn = F.reshape(w_sn, w_shape)
    w_sn = F.identity(w_sn, outputs=[W_sn.data])
    w_sn.persistent = True
    return w_sn
Exemple #30
0
    def __init__(self, waveglow, hp):
        mel_input = F.constant(shape=[1, hp.n_mels, 88])
        wave = waveglow.infer(mel_input, sigma=0)
        real, imag = F.stft(wave,
                            window_size=hp.win_length,
                            stride=hp.hop_length,
                            fft_size=hp.n_fft)
        bias_spec = F.pow_scalar(real**2 + imag**2, 0.5)
        bias_spec.forward(clear_buffer=True)

        self.bias_spec = bias_spec.d.copy()[:, :, 0][0, :, None]
        self.hparams = hp
Exemple #31
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Inputs
        x0 = inputs[0].data
        x1 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dx1 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_x1 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dx1 = outputs[1].grad

        # Computation
        x1_inv_square = F.pow_scalar(x1, -2.0)
        if prop_down[0]:
            if accum[0]:
                g_x0 -= g_dx1 * dy * x1_inv_square
            else:
                g_x0.copy_from(-g_dx1 * dy * x1_inv_square)
        if prop_down[1]:
            if accum[1]:
                g_x1 += dy * (g_dx1 * 2 * x0 * F.pow_scalar(x1, -3.0) -
                              g_dx0 * x1_inv_square)
            else:
                g_x1.copy_from(dy * (2 * g_dx1 * x0 * F.pow_scalar(x1, -3.0) -
                                     g_dx0 * x1_inv_square))
        if prop_down[2]:
            if accum[2]:
                g_dy += g_dx0 / x1 - g_dx1 * x0 * x1_inv_square
            else:
                g_dy.copy_from(g_dx0 / x1 - g_dx1 * x0 * x1_inv_square)
Exemple #32
0
def _focal_loss(pred, gt):
    '''Modified focal loss. Exactly the same as CornerNet.

    Modified for more stability by using log_sigmoid function

      Arguments:
        pred (batch x c x h x w): logit (must be values before sigmoid activation)
        gt_regr (batch x c x h x w)
    '''
    alpha = 2
    beta = 4
    pos_inds = F.greater_equal_scalar(gt, 1)
    neg_inds = 1 - pos_inds
    neg_weights = F.pow_scalar(1.0 - gt, beta)
    prob_pred = F.sigmoid(pred)
    pos_loss = F.log_sigmoid(pred) * F.pow_scalar(1.0 - prob_pred,
                                                  alpha) * pos_inds
    pos_loss = F.sum(pos_loss)
    neg_loss = F.log_sigmoid(-pred) * F.pow_scalar(
        prob_pred, alpha) * neg_weights * neg_inds
    neg_loss = F.sum(neg_loss)
    num_pos = F.maximum_scalar(F.sum(pos_inds), 1)
    loss = -(1 / num_pos) * (pos_loss + neg_loss)
    return loss
Exemple #33
0
def spectrogram(wave, window_size):
    """Computes the spectrogram from the waveform.

    Args:
        wave (nn.Variable): Input waveform of shape (B, 1, L).
        window_size (int): Window size.

    Returns:
        nn.Variable: The square spectrogram.
    """
    re, im = stft(wave,
                  window_size=window_size,
                  stride=window_size // 4,
                  fft_size=window_size)
    return F.pow_scalar(re**2 + im**2, 0.5)
Exemple #34
0
def IN(inp, axes=[1], decay_rate=0.9, eps=1e-5, fix_parameters=True):
    """Instance Normalization
    """
    if inp.shape[0] == 1:
        return INByBatchNorm(inp, axes, decay_rate, eps, fix_parameters)

    b, c = inp.shape[0:2]
    spacial_shape = inp.shape[2:]

    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), not fix_parameters)

    # Instance normalization
    # normalize over spatial dimensions
    axis = [i for i in range(len(inp.shape)) if i > 1]
    mean = F.sum(inp, axis=axis, keepdims=True) / np.prod(axis)
    var = F.pow_scalar(F.sum(inp - mean, axis=axis, keepdims=True),
                       2.0) / np.prod(axis)
    h = (inp - mean) / F.pow_scalar(var + eps, 0.5)
    return gamma * inp + beta
Exemple #35
0
def gradient_clipping(params, max_norm, norm_type=2):
    params = list(filter(lambda p: p.need_grad == True, params))
    norm_type = float(norm_type)

    if norm_type == float('inf'):
        total_norm = max(np.abs(p.g).max() for p in params)
    else:
        total_norm = 0.
        for p in params:
            param_norm = F.pow_scalar(F.sum(p.grad**norm_type), 1. / norm_type)
            total_norm += param_norm**norm_type
        total_norm = total_norm**(1. / norm_type)
    clip_coeff = max_norm / (float(total_norm.data) + 1e-6)
    if clip_coeff < 1:
        for p in params:
            p.g = p.g * clip_coeff
Exemple #36
0
    def __pow__(self, other):
        """
        Element-wise power function.
        Implements the power operator expression ``A ** B``, together with :func:`~nnabla.variable.__rpow__` .
        When a scalar is specified for ``other``, this function performs an
        element-wise operation for all elements in ``self``.

        Args:
            other (float or ~nnabla.Variable): Internally calling
                :func:`~nnabla.functions.pow2` or
                :func:`~nnabla.functions.pow_scalar` according to the
                type.

        Returns: :class:`nnabla.Variable`

        """
        import nnabla.functions as F
        if isinstance(other, Variable):
            return F.pow2(self, other)
        return F.pow_scalar(self, other)
Exemple #37
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        r = F.mean(F.abs(h - one))
    return r