예제 #1
0
파일: test_graph.py 프로젝트: zwsong/nnabla
def test_graph_logreg(seed):
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4], need_grad=True)
    w = nn.Variable([12, 5], need_grad=True)
    b = nn.Variable([5], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    w.d = rng.randn(*w.shape)
    b.d = rng.randn(*b.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    with nn.auto_forward():
        z = F.affine(x, w, b, 1)
        l = F.softmax_cross_entropy(z, t, 1)
        L = F.mean(l)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.g = 0
    w.g = 0
    b.g = 0
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)

    inputs = [x, w, b]

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert np.allclose(ngrad, agrad, atol=1e-2)
예제 #2
0
파일: cnn_model_025.py 프로젝트: kzky/works
def attention(k, q, v, div_dim=True, softmax=True):
    v_shape = v.shape
    k = F.identity(k)
    q = F.identity(q)
    k = F.reshape(k, (k.shape[0], np.prod(k.shape[1:])))
    q = F.reshape(q, (q.shape[0], np.prod(q.shape[1:])))
    v = q  # F.reshape is inplace
    cf = F.affine(q, F.transpose(k, (1, 0)))
    if div_dim:
        dim = np.prod(v_shape[1:])
        cf /= np.sqrt(dim)
    h = cf
    if softmax: 
        h = F.softmax(h)
    h = F.affine(h, v)x
    h = F.reshape(h, v_shape)
    return h
예제 #3
0
    def connect(self, fname, inputs, args):
        if fname in ['Convolution', 'Deconvolution']:
            # TODO: address leading batch dimension
            args['channel_last'] = True
            x = inputs[0]
            w = inputs[1]
            b = inputs[2] if len(inputs) == 3 else None
            scope = self.get_parameter_scope(w)
            with nn.parameter_scope(scope):
                wd = w.d.copy().transpose(0, 2, 3, 1)
                w = nn.parameter.get_parameter_or_create('W_cl', wd.shape, wd)
            o = F.convolution(x, w, b, **args)
        elif fname == 'BatchNormalization':
            # TODO: address leading batch dimension
            x = inputs[0]
            beta = inputs[1]
            gamma = inputs[2]
            mean = inputs[3]
            var = inputs[4]
            args['axes'] = [len(x.shape) - 1]
            scope = self.get_parameter_scope(beta)
            with nn.parameter_scope(scope):
                beta_d = beta.d.copy().transpose(0, 2, 3, 1)
                gamma_d = gamma.d.copy().transpose(0, 2, 3, 1)
                mean_d = mean.d.copy().transpose(0, 2, 3, 1)
                var_d = var.d.copy().transpose(0, 2, 3, 1)
                beta = nn.parameter.get_parameter_or_create(
                    'beta_cl', beta_d.shape, beta_d, beta.need_grad)
                gamma = nn.parameter.get_parameter_or_create(
                    'gamma_cl', gamma_d.shape, gamma_d, gamma.need_grad)
                mean = nn.parameter.get_parameter_or_create(
                    'mean_cl', mean_d.shape, mean_d, mean.need_grad)
                var = nn.parameter.get_parameter_or_create(
                    'var_cl', var_d.shape, var_d, var.need_grad)
            o = F.batch_normalization(x, beta, gamma, mean, var, **args)
        elif fname in ['MaxPooling', 'AveragePooling', 'SumPooling']:
            args['channel_last'] = True
            o = self._call_function(fname, inputs, args)
        elif fname in ['Concatenate']:
            args['axis'] = len(inputs[0].shape) - 1
            o = self._call_function(fname, inputs, args)
        elif fname == 'Affine':
            x = inputs[0]

            _, h_s, w_s, c_s = inputs[0].shape
            _, b_s = inputs[1].shape
            wd = inputs[1].d.copy()
            wd = np.reshape(wd, (c_s, h_s, w_s, b_s))
            wd = np.transpose(wd, (1, 2, 0, 3))
            wd = np.reshape(wd, (-1, b_s))
            w = nn.parameter.get_parameter_or_create('w_cl', wd.shape, wd,
                                                     False)

            b = inputs[2] if len(inputs) == 3 else None
            o = F.affine(x, w, b, **args)
        else:
            o = self._call_function(fname, inputs, args)
        return o
예제 #4
0
def spectral_normalization_for_affine(w,
                                      itr=1,
                                      eps=1e-12,
                                      input_axis=1,
                                      test=False):
    W_sn = get_parameter_or_create("W_sn", w.shape, ConstantInitializer(0),
                                   False)
    if test:
        return W_sn

    d0 = np.prod(w.shape[0:-1])  # In
    d1 = np.prod(w.shape[-1])  # Out
    u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(),
                                 False)
    u = F.reshape(u0, [d1, 1])
    # Power method
    for _ in range(itr):
        # v
        v = F.affine(w, u)
        v = F.div2(
            v,
            F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5))
        v = F.reshape(v, [1, d0])
        # u
        u = F.affine(v, w)
        u = F.div2(
            u,
            F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5))
        u = F.reshape(u, [d1, 1])
    # Iterate
    u = F.identity(u, outputs=[u0.data])
    u.persistent = True
    # No grad
    u.need_grad = False
    v.need_grad = False
    # Spectral normalization
    wv = F.affine(v, w)
    sigma = F.affine(wv, u)
    sigma = F.broadcast(F.reshape(sigma, [1 for _ in range(len(w.shape))]),
                        w.shape)
    w_sn = F.div2(w, sigma, outputs=[W_sn.data])
    w_sn.persistent = True
    return w_sn
예제 #5
0
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False):
    w_shape = w.shape
    W_sn = get_parameter_or_create("W_sn", w_shape, ConstantInitializer(0),
                                   False)
    if test:
        return W_sn

    d0 = w.shape[0]  # Out
    d1 = np.prod(w.shape[1:])  # In
    w = F.reshape(w, [d0, d1], inplace=False)
    u0 = get_parameter_or_create("singular-vector", [d0], NormalInitializer(),
                                 False)
    u = F.reshape(u0, [1, d0])
    # Power method
    for _ in range(itr):
        # v
        v = F.affine(u, w)
        v = F.div2(
            v,
            F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5))
        v = F.reshape(v, [d1, 1])
        # u
        u = F.affine(w, v)
        u = F.div2(
            u,
            F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5))
        u = F.reshape(u, [1, d0])
    # Iterate
    u = F.identity(u, outputs=[u0.data])
    u.persistent = True
    # No grad
    u.need_grad = False
    v.need_grad = False
    # Spectral normalization
    wv = F.affine(w, v)
    sigma = F.affine(u, wv)
    w_sn = F.div2(w, sigma)
    w_sn = F.reshape(w_sn, w_shape)
    w_sn = F.identity(w_sn, outputs=[W_sn.data])
    w_sn.persistent = True
    return w_sn
예제 #6
0
def mapping_network(z, outmaps=512, num_layers=8, net_scope='G_mapping/Dense'):

    lrmul = 0.01
    runtime_coef = 0.00044194172

    out = z
    for i in range(num_layers):
        with nn.parameter_scope(f'{net_scope}{i}'):
            W, bias = weight_init_fn(shape=(out.shape[1], outmaps),
                                     lrmul=lrmul)
            out = F.affine(out, W * runtime_coef, bias * lrmul)
            out = F.mul_scalar(F.leaky_relu(out, alpha=0.2, inplace=False),
                               np.sqrt(2),
                               inplace=False)
    return out
예제 #7
0
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           fix_parameters=False, rng=None, with_bias=True):
    """
    The affine layer, also known as the fully connected layer. Computes

    .. math::
        {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}.

    where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively,
    and :math:`{\\mathbf A}, {\\mathbf b}` are constants.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w, b, base_axis)
예제 #8
0
    def __call__(self, x, return_encoding_indices=False):

        x = F.transpose(x, (0, 2, 3, 1))
        x_flat = x.reshape((-1, self.embedding_dim))

        x_flat_squared = F.broadcast(F.sum(x_flat**2, axis=1, keepdims=True),
                                     (x_flat.shape[0], self.num_embedding))
        emb_wt_squared = F.transpose(
            F.sum(self.embedding_weight**2, axis=1, keepdims=True), (1, 0))

        distances = x_flat_squared + emb_wt_squared - 2 * \
            F.affine(x_flat, F.transpose(self.embedding_weight, (1, 0)))

        encoding_indices = F.min(distances,
                                 only_index=True,
                                 axis=1,
                                 keepdims=True)
        encoding_indices.need_grad = False

        quantized = F.embed(
            encoding_indices.reshape(encoding_indices.shape[:-1]),
            self.embedding_weight).reshape(x.shape)

        if return_encoding_indices:
            return encoding_indices, F.transpose(quantized, (0, 3, 1, 2))

        encodings = F.one_hot(encoding_indices, (self.num_embedding, ))

        e_latent_loss = F.mean(
            F.squared_error(quantized.get_unlinked_variable(need_grad=False),
                            x))
        q_latent_loss = F.mean(
            F.squared_error(quantized,
                            x.get_unlinked_variable(need_grad=False)))
        loss = q_latent_loss + self.commitment_cost * e_latent_loss

        quantized = x + (quantized - x).get_unlinked_variable(need_grad=False)

        avg_probs = F.mean(encodings, axis=0)
        perplexity = F.exp(-F.sum(avg_probs * F.log(avg_probs + 1.0e-10)))

        return loss, F.transpose(quantized,
                                 (0, 3, 1, 2)), perplexity, encodings