예제 #1
0
파일: rnn.py 프로젝트: Pandinosaurus/nnabla
def _gru(x, h, w, b, with_bias):
    """GRU cell.
    Args:
        x (:obj:`~nnabla.Variable`): Input data.
        h (:obj:`~nnabla.Variable`): Hidden state.
        w (:obj:`~nnabla.Variable`): Weight.
        b (:obj:`~nnabla.Variable`): Bias.
        with_bias (bool): Include the bias or not.
    """
    hidden_size = h.shape[1]
    xh = F.concatenate(*(x, h), axis=1)
    w0, w1, w2 = F.split(w, axis=0)
    b0 = b1 = b2 = b3 = None
    if with_bias:
        b0, b1, b2, b3 = F.split(b, axis=0)
    r_t = F.sigmoid(F.affine(xh, F.transpose(w0, (1, 0)), b0))
    z_t = F.sigmoid(F.affine(xh, F.transpose(w1, (1, 0)), b1))

    w2_0 = w2[:, :w2.shape[1] - hidden_size]
    w2_1 = w2[:, w2.shape[1] - hidden_size:]
    n_t = F.tanh(
        F.affine(x, F.transpose(w2_0, (1, 0)), b2) +
        r_t * F.affine(h, F.transpose(w2_1, (1, 0)), b3))
    h_t = (1 - z_t) * n_t + z_t * h

    return h_t
예제 #2
0
def simple_rnn(inputs: nn.Variable, units: int, mask: Optional[nn.Variable] = None,
               return_sequences: bool = False, fix_parameters=False) -> nn.Variable:
    '''
    A vanilla recurrent neural network layer
    Args:
        inputs (nnabla.Variable): A shape of [batch_size, length, embedding_size].
        units (int): Dimensionality of the output space.
        mask (nnabla.Variable): A shape of [batch_size, length, 1].
        return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence.
        fix_parameters (bool): Fix parameters (Set need_grad=False).
    Returns:
        nn.Variable: A shape [batch_size, length, units]
        or
        nn.Variable: A shape [batch_size units].
    '''

    hs = []
    batch_size, length, embedding_size = inputs.shape
    h0 = F.constant(0, shape=(batch_size, units))

    h = h0

    if mask is None:
        mask = F.constant(1, shape=(batch_size, length, 1))

    for x, cond in zip(F.split(inputs, axis=1), F.split(mask, axis=1)):
        h_t = F.tanh(PF.affine(F.concatenate(x, h, axis=1), units, fix_parameters=fix_parameters))
        h = where(cond, h_t, h)
        hs.append(h)

    if return_sequences:
        hs = F.stack(*hs, axis=1)
        return hs
    else:
        return hs[-1]
예제 #3
0
def test_graph_more_than_2_outputs(seed, clear_buffer):
    count = 0

    def func_hook(f):
        nonlocal count
        if f.name == 'Split':
            count += 1

    nn.clear_parameters()

    a = nn.Variable.from_numpy_array(np.ones((10, )))
    b = nn.Variable.from_numpy_array(np.ones((10, )))
    c = F.add2(a, b, inplace=True, outputs=[a.data])
    y = F.split(c, axis=0)
    nn.forward_all(y, function_pre_hook=func_hook)

    assert count == 1

    res = [x.d for x in y]
    assert_allclose(res, [2.0] * 10)

    a = nn.Variable.from_numpy_array(np.ones((10, )))
    b = nn.Variable.from_numpy_array(np.ones((10, )))
    c = F.add2(a, b, inplace=True, outputs=[a.data])
    y = F.split(c, axis=0)
    for yy in y:
        yy.forward()
    res = [x.d for x in y]
    assert_allclose(res, [11.0] * 10)
예제 #4
0
def get_loss(l1,
             l2,
             x,
             t,
             w_init,
             b_init,
             num_words,
             batch_size,
             state_size,
             dropout=False,
             dropout_rate=0.5,
             embed_name='embed',
             pred_name='pred'):
    e_list = [
        PF.embed(x_elm, num_words, state_size, name=embed_name)
        for x_elm in F.split(x, axis=1)
    ]
    t_list = F.split(t, axis=1)
    loss = 0
    for i, (e_t, t_t) in enumerate(zip(e_list, t_list)):
        if dropout:
            h1 = l1(F.dropout(e_t, dropout_rate), w_init, b_init)
            h2 = l2(F.dropout(h1, dropout_rate), w_init, b_init)
            y = PF.affine(F.dropout(h2, dropout_rate),
                          num_words,
                          name=pred_name)
        else:
            h1 = l1(e_t, w_init, b_init)
            h2 = l2(h1, w_init, b_init)
            y = PF.affine(h2, num_words, name=pred_name)
        t_t = F.reshape(t_t, [batch_size, 1])
        loss += F.mean(F.softmax_cross_entropy(y, t_t))
    loss /= float(i + 1)

    return loss
예제 #5
0
def lstm(inputs: nn.Variable, units: int, mask: Optional[nn.Variable] = None, initial_state: Tuple[nn.Variable, nn.Variable] = None,
         return_sequences: bool = False, return_state: bool = False, fix_parameters: bool = False) -> nn.Variable:
    '''
    A long short-term memory
    Args:
        inputs (nnabla.Variable): A shape of [batch_size, length, embedding_size].
        units (int): Dimensionality of the output space.
        mask (nnabla.Variable): A shape of [batch_size, length].
        initial_state ([nnabla.Variable, nnabla.Variable]): A tuple of an initial cell and an initial hidden state.
        return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence.
        return_state (bool): Whether to return the last state which is consist of the cell and the hidden state.
        fix_parameters (bool): Fix parameters (Set need_grad=False).
    Returns:
        nn.Variable: A shape [batch_size, length, units].
        or
        nn.Variable: A shape [batch_size units]
    '''
    
    batch_size, length, embedding_size = inputs.shape

    if initial_state is None:
        c0 = F.constant(0, shape=(batch_size, units))
        h0 = F.constant(0, shape=(batch_size, units))
    else:
        assert type(initial_state) is tuple or type(initial_state) is list, \
               'initial_state must be a typle or a list.'
        assert len(initial_state) == 2, \
               'initial_state must have only two states.'

        c0, h0 = initial_state

        assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
        assert c0.shape[0] == batch_size, \
               'batch size of initial_state ({0}) is different from that of inputs ({1}).'.format(c0.shape[0], batch_size)
        assert c0.shape[1] == units, \
               'units size of initial_state ({0}) is different from that of units of args ({1}).'.format(c0.shape[1], units)

    cell = c0
    hidden = h0

    hs = []

    if mask is None:
        mask = F.constant(1, shape=(batch_size, length, 1))
    for x, cond in zip(F.split(inputs, axis=1), F.split(mask, axis=1)):
        cell_t, hidden_t = lstm_cell(x, cell, hidden)
        cell = where(cond, cell_t, cell)
        hidden = where(cond, hidden_t, hidden)
        hs.append(hidden)

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret
예제 #6
0
def top_k_error(target_action,
                target_action_type,
                target_action_mask,
                rule_prob,
                terminal_gen_action_prob,
                token_prob,
                copy_prob,
                k=5):
    batch_size, max_action_length, _ = target_action.shape
    _, _, rule_num = rule_prob.shape
    _, _, token_num = token_prob.shape
    _, _, max_query_length = copy_prob.shape

    # (batch_size, max_action_length)
    rule_mask, token_mask, copy_mask = F.split(target_action_type, axis=2)

    # (batch_size, max_action_length)
    target_rule, target_token, target_copy = F.split(target_action, axis=2)
    target_rule = F.reshape(target_rule, (batch_size, max_action_length, 1))

    # (batch_size, max_action_length)
    gen_token_prob, copy_token_prob = F.split(terminal_gen_action_prob, axis=2)
    gen_token_prob = F.reshape(gen_token_prob,
                               (batch_size, max_action_length, 1))
    gen_token_prob = F.broadcast(gen_token_prob,
                                 (batch_size, max_action_length, token_num))
    copy_token_prob = F.reshape(copy_token_prob,
                                (batch_size, max_action_length, 1))
    copy_token_prob = F.broadcast(
        copy_token_prob, (batch_size, max_action_length, max_query_length))
    # (batch_size, max_action_length, token_num)
    token_prob = gen_token_prob * token_prob
    # (batch_size, max_action_length, max_query_length)
    copy_prob = copy_token_prob * copy_prob
    # (batch_size, max_action_length, token_num + max_query_length)
    gen_or_copy = F.concatenate(token_prob, copy_prob, axis=2)

    # (batch_size, max_action_length)
    token_label = token_mask * target_token + (copy_mask *
                                               (target_copy + token_num))
    token_label = F.reshape(token_label, (batch_size, max_action_length, 1))

    # (batch_size, max_action_length, 1)
    rule_err = F.top_n_error(rule_prob, target_rule, axis=2, n=k)
    rule_err = F.reshape(rule_err, (batch_size, max_action_length))
    # (batch_size, max_action_length, 1)
    token_err = F.top_n_error(gen_or_copy, token_label, axis=2, n=k)
    token_err = F.reshape(token_err, (batch_size, max_action_length))

    # (batch_size, max_action_length)
    err = rule_mask * rule_err + (token_mask + copy_mask) * token_err
    # (batch_size,)
    num = F.sum(rule_mask, axis=1) + F.sum(token_mask, axis=1) + F.sum(
        copy_mask, axis=1)
    # (batch_size,)
    err = F.sum(err, axis=1)
    # (batch_size,)
    err = err / (num + 1e-7)
    return F.mean(err)
예제 #7
0
def TimeDistributedSoftmaxCrossEntropy(y_pred, y_true):
    '''
    A time distributed softmax crossentropy
    Args:
        y_pred (nnabla.Variable): A shape of [B, SentenceLength, O]. # one-hot
        y_true (nnabla.Variable): A shape of [B, SentenceLength, 1]. # index
    Returns:
        nn.Variable: A shape [B, SentenceLength].
    '''
    ret = []
    for y_p, y_t in zip(F.split(y_pred, axis=1), F.split(y_true, axis=1)):
        ret.append(F.softmax_cross_entropy(y_p, y_t))
    return F.concatenate(*ret)
예제 #8
0
def time_distributed_softmax_cross_entropy(y_pred: nn.Variable,
                                           y_true: nn.Variable) -> nn.Variable:
    '''
    A time distributed softmax crossentropy
    Args:
        y_pred (nnabla.Variable): A shape of (batch_size, length, number_of_outputs). # one-hot
        y_true (nnabla.Variable): A shape of (batch_size, length, 1). # index
    Returns:
        nn.Variable: A shape (batch_size, length).
    '''
    ret = []
    for y_p, y_t in zip(F.split(y_pred, axis=1), F.split(y_true, axis=1)):
        ret.append(F.softmax_cross_entropy(y_p, y_t))
    return F.concatenate(*ret)
예제 #9
0
def lab2rgb(input):
    input_trans = F.split(input, axis=1)
    L, a, b = F.split(input, axis=1)
    y = (L + 16.0) / 116.0
    x = (a / 500.0) + y
    z = y - (b / 200.0)
    neg_mask = F.less_scalar(z, 0).apply(need_grad=False)
    z = z * F.logical_not(neg_mask)
    mask_Y = F.greater_scalar(y, 0.2068966).apply(need_grad=False)
    mask_X = F.greater_scalar(x, 0.2068966).apply(need_grad=False)
    mask_Z = F.greater_scalar(z, 0.2068966).apply(need_grad=False)
    Y_1 = (y ** 3) * mask_Y
    Y_2 = L / (116. * 7.787) * F.logical_not(mask_Y)
    var_Y = Y_1 + Y_2

    X_1 = (x ** 3) * mask_X
    X_2 = (x - 16. / 116.) / 7.787 * F.logical_not(mask_X)
    var_X = X_1 + X_2

    Z_1 = (z ** 3) * mask_Z
    Z_2 = (z - 16. / 116.) / 7.787 * F.logical_not(mask_Z)
    var_Z = Z_1 + Z_2

    X = 0.95047 * var_X
    Y = 1.00000 * var_Y
    Z = 1.08883 * var_Z

    var_R = X * 3.2406 + Y * -1.5372 + Z * -0.4986
    var_G = X * -0.9689 + Y * 1.8758 + Z * 0.0415
    var_B = X * 0.0557 + Y * -0.2040 + Z * 1.0570

    mask_R = F.greater_scalar(var_R, 0.0031308).apply(need_grad=False)
    n_mask_R = F.logical_not(mask_R)
    R_1 = (1.055 * (F.maximum2(var_R, n_mask_R) ** (1 / 2.4)) - 0.055) * mask_R
    R_2 = (12.92 * var_R) * n_mask_R
    var_R = R_1 + R_2

    mask_G = F.greater_scalar(var_G, 0.0031308).apply(need_grad=False)
    n_mask_G = F.logical_not(mask_G)
    G_1 = (1.055 * (F.maximum2(var_G, n_mask_G) ** (1 / 2.4)) - 0.055) * mask_G
    G_2 = (12.92 * var_G) * n_mask_G
    var_G = G_1 + G_2

    mask_B = F.greater_scalar(var_B, 0.0031308).apply(need_grad=False)
    n_mask_B = F.logical_not(mask_B)
    B_1 = (1.055 * (F.maximum2(var_B, n_mask_B) ** (1 / 2.4)) - 0.055) * mask_B
    B_2 = (12.92 * var_B) * n_mask_B
    var_B = B_1 + B_2
    return F.stack(var_R, var_G, var_B, axis=1)
예제 #10
0
def lstm(x, h, c, w, b, with_bias):
    hidden_size = h.shape[1]
    xh = F.concatenate(*(x, h), axis=1)
    w0, w1, w2, w3 = F.split(w, axis=0)
    b0 = b1 = b2 = b3 = None
    if with_bias:
        b0, b1, b2, b3 = F.split(b, axis=0)
    i_t = F.affine(xh, F.transpose(w0, (1, 0)), b0)
    f_t = F.affine(xh, F.transpose(w1, (1, 0)), b1)
    g_t = F.affine(xh, F.transpose(w2, (1, 0)), b2)
    o_t = F.affine(xh, F.transpose(w3, (1, 0)), b3)
    c_t = F.sigmoid(f_t) * c + F.sigmoid(i_t) * F.tanh(g_t)
    h_t = F.sigmoid(o_t) * F.tanh(c_t)

    return h_t, c_t
예제 #11
0
def loss(target_action, target_action_type, target_action_mask, rule_prob,
         terminal_gen_action_prob, token_prob, copy_prob):
    batch_size, max_action_length, _ = target_action.shape
    _, _, rule_num = rule_prob.shape
    _, _, token_num = token_prob.shape
    _, _, max_query_length = copy_prob.shape

    # (batch_size, max_action_length)
    target_rule, target_token, target_copy = F.split(target_action, axis=2)

    target_rule = F.reshape(target_rule, (batch_size, max_action_length, 1))
    target_rule = F.one_hot(
        target_rule, (rule_num, ))  # (batch_size, max_action_length, rule_num)
    rule_tgt_prob = rule_prob * target_rule  # (batch_size, max_action_length, rule_num)
    rule_tgt_prob = F.sum(rule_tgt_prob,
                          axis=2)  # (batch_size, max_action_length)

    target_token = F.reshape(target_token, (batch_size, max_action_length, 1))
    target_token = F.one_hot(
        target_token,
        (token_num, ))  # (batch_size, max_action_length, token_num)
    token_tgt_prob = token_prob * target_token  # (batch_size, max_action_length, token_num)
    token_tgt_prob = F.sum(token_tgt_prob,
                           axis=2)  # (batch_size, max_action_length)

    target_copy = F.reshape(target_copy, (batch_size, max_action_length, 1))
    target_copy = F.one_hot(
        target_copy, (max_query_length,
                      ))  # (batch_size, max_action_length, max_query_lenght)
    copy_tgt_prob = copy_prob * target_copy  # (batch_size, max_action_length, max_query_length)
    copy_tgt_prob = F.sum(copy_tgt_prob,
                          axis=2)  # (batch_size, max_action_length)

    # (batch_size, max_action_length)
    gen_token_prob, copy_token_prob = F.split(terminal_gen_action_prob, axis=2)
    # (batch_size, max_action_length)
    rule_mask, token_mask, copy_mask = F.split(target_action_type, axis=2)

    # (batch_size, max_action_length)
    target_prob = rule_mask * rule_tgt_prob + \
                  token_mask * gen_token_prob * token_tgt_prob + \
                  copy_mask * copy_token_prob * copy_tgt_prob
    # (batch_size, max_action_length)
    likelihood = F.log(target_prob + 1e-7)
    loss = -likelihood * target_action_mask
    # (batch_size)
    loss = F.sum(loss, axis=1)
    return F.mean(loss)
예제 #12
0
def simple_rnn(inputs, units, return_sequences=False, fix_parameters=False):
    '''
    A vanilla recurrent neural network layer
    Args:
        inputs (nnabla.Variable): A shape of [B, SentenceLength, EmbeddingSize].
        units (int): Dimensionality of the output space.
        return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence.
        fix_parameters (bool): Fix parameters (Set need_grad=False).
    Returns:
        nn.Variable: A shape [B, SentenceLength, units].
        or
        nn.Variable: A shape [B, units]
    '''

    hs = []
    batch_size = inputs.shape[0]
    sentence_length = inputs.shape[1]
    h0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)))

    inputs = F.split(inputs, axis=1) # split in the direction of sequence

    h = h0
    for x in inputs:
        h = F.tanh(PF.affine(F.concatenate(x, h, axis=1), units, fix_parameters=fix_parameters))
        hs.append(h)

    if return_sequences:
        hs = F.stack(*hs, axis=1)
        return hs
    else:
        return hs[-1]
예제 #13
0
def yolov2_image_coordinate(t_xy, t_wh, biases):
    import numpy as np
    from nnabla.parameter import pop_parameter, set_parameter
    h, w = t_xy.shape[-2:]
    xs = pop_parameter('xs')
    ys = pop_parameter('ys')
    if xs is None or (h != xs.shape[-1]):
        xs = nn.Variable.from_numpy_array(np.arange(w).reshape(1, 1, 1, -1))
        xs.need_grad = False
        set_parameter('xs', xs)
    if ys is None or (h != ys.shape[-2]):
        ys = nn.Variable.from_numpy_array(np.arange(h).reshape(1, 1, -1, 1))
        ys.need_grad = False
        set_parameter('ys', ys)
    t_x, t_y = F.split(t_xy, axis=2)
    oshape = list(t_x.shape)
    oshape.insert(2, 1)
    t_x = F.reshape((t_x + xs) / w, oshape)
    t_y = F.reshape((t_y + ys) / h, oshape)
    pop_parameter('biases')
    biases = biases.reshape(1, biases.shape[0], biases.shape[1], 1,
                            1) / np.array([w, h]).reshape(1, 1, 2, 1, 1)
    b = nn.Variable.from_numpy_array(biases)
    b.need_grad = False
    set_parameter('biases', b)
    t_wh = t_wh * b
    return t_x, t_y, t_wh
def make_symmetric_matrix(_x):
    # input
    # _x : type=nn.Variable(), _x.shape=(batch_size, *, *, *)

    # output
    # j_vector : type=nn.Variable(), j_vector.shape=(batch_size, batch_size - 1, *, *, *)

    batch_size = _x.shape[0]
    var_list = F.split(_x)
    concat_list = []
    # --- split & gather components ---
    for i in range(batch_size):
        tmp_list = []
        for j in range(batch_size):
            if i != j:
                tmp_list.append(
                    F.reshape(var_list[j], [
                        1,
                    ] + list(var_list[j].shape)))
        if len(tmp_list) > 1:
            concat_var = F.concatenate(*tmp_list, axis=0)
        else:
            concat_var = tmp_list[0]
        concat_list.append(
            F.reshape(concat_var, [
                1,
            ] + list(concat_var.shape)))
    # --- concatenate ---
    j_vector = F.concatenate(*concat_list, axis=0)
    return j_vector
예제 #15
0
def create_fixed_length_rnn(xs0, h0, w0, w, b, num_layers, nonlinearity,
                            num_directions, with_bias):
    # xs : [T, B, I]
    # h0 : [L, D, B, H]
    # c0 : [L, D, B, H]
    # w0 : [D, H, I+H]
    # w : [L-1, D, H, D * H + H]
    # b : [L, D, H]

    batch_size = xs0.shape[1]
    hidden_size = h0.shape[3]

    if xs0.shape[0] == 1:
        xs = [xs0[0]]
    else:
        xs = F.split(xs0, axis=0)
    hn = []
    for i in range(num_layers):
        wi = w0
        if i > 0:
            wi = w[i - 1]
        # wi : [D, H, ?]
        # Forward direction
        hif = h0[i, 0]  # [B, H]
        wif = wi[0]
        bif = None
        if with_bias:
            bif = b[i, 0]
        hs = []
        for j, x in enumerate(xs):
            # x : [B, I]
            hif = rnn(x, hif, wif, bif, nonlinearity, with_bias)
            hs.append(hif)
        hn.append(hif)

        if num_directions == 1:
            xs = hs
            continue

        # Backward direction
        hib = h0[i, 1]  # [B, H]
        wib = wi[1]
        bib = None
        if with_bias:
            bib = b[i, 1]
        for k, x, in enumerate(reversed(xs)):
            j = len(xs) - 1 - k
            # x : [B, I]
            hib = rnn(x, hib, wib, bib, nonlinearity, with_bias)
            hs[j] = F.concatenate(hs[j], hib, axis=1)
        hn.append(hib)
        xs = hs

    ys = xs  # list of [B, HD]
    ys = F.stack(*ys, axis=0)  # [T, B, HD]
    hn = F.reshape(F.stack(*hn, axis=0),
                   (num_layers, num_directions, batch_size,
                    hidden_size))  # LD list of [B, H] --> [L, D, B, H]
    return ys, hn
예제 #16
0
def gru(x, h, w, b, with_bias):
    hidden_size = h.shape[1]
    xh = F.concatenate(*(x, h), axis=1)
    w0, w1, w2 = F.split(w, axis=0)
    b0 = b1 = b2 = b3 = None
    if with_bias:
        b0, b1, b2, b3 = F.split(b, axis=0)
    r_t = F.sigmoid(F.affine(xh, F.transpose(w0, (1, 0)), b0))
    z_t = F.sigmoid(F.affine(xh, F.transpose(w1, (1, 0)), b1))

    w2_0 = w2[:, :w2.shape[1]-hidden_size]
    w2_1 = w2[:, w2.shape[1]-hidden_size:]
    n_t = F.tanh(F.affine(x, F.transpose(w2_0, (1, 0)), b2) +
                 r_t*F.affine(h, F.transpose(w2_1, (1, 0)), b3))
    h_t = (1-z_t)*n_t + z_t*h

    return h_t
예제 #17
0
def split(x, axis=0):
    if x.shape[axis] == 1:
        s = list(x.shape)
        s.pop(axis)
        x = F.broadcast(x, x.shape)
        return [F.reshape(x, s)]
    else:
        return F.split(x, axis=axis)
예제 #18
0
 def time_distributed_func(x, *args, **kwargs):
     ret = []
     batch_size = x.shape[0]
     for x_ in F.split(x, axis=1):
         value = func(x_, *args, **kwargs)
         _, output_dim = value.shape
         ret.append(F.reshape(value, (batch_size, 1, output_dim)))
     return F.concatenate(*ret, axis=1)
예제 #19
0
def multihead_attention(query: nn.Variable,
                        key: nn.Variable,
                        value: nn.Variable,
                        h: int,
                        mask=None,
                        train: bool = True,
                        dropout_ratio: float = 0.1):
    batch_size, sentence_length_query, embedding_size = query.shape
    batch_size, sentence_length_memory, embedding_size = key.shape

    assert embedding_size % h == 0

    q = query
    k = key
    v = value

    dim = embedding_size // h

    with nn.parameter_scope('q_dense'):
        q = time_distributed(PF.affine)(q, embedding_size)
    with nn.parameter_scope('k_dense'):
        k = time_distributed(PF.affine)(k, embedding_size)
    with nn.parameter_scope('v_dense'):
        v = time_distributed(PF.affine)(v, embedding_size)

    q = F.reshape(q, shape=(batch_size, h, sentence_length_query, dim))
    k = F.reshape(k, shape=(batch_size, h, sentence_length_memory, dim))
    v = F.reshape(v, shape=(batch_size, h, sentence_length_memory, dim))

    ret = []
    # for h times
    for _q, _k, _v in zip(F.split(q, axis=1), F.split(k, axis=1),
                          F.split(v, axis=1)):
        ret.append(
            attention(_q,
                      _k,
                      _v,
                      mask=mask,
                      train=train,
                      dropout_ratio=dropout_ratio))

    x = F.concatenate(*ret, axis=2)
    with nn.parameter_scope('concat_dense'):
        x = time_distributed(PF.affine)(x, embedding_size)
    return x
예제 #20
0
파일: ops.py 프로젝트: sony/nnabla-examples
def guided_filter(img, r, eps):
    """
    Edge preserving filter
    """
    img2 = F.concatenate(img, img * img, axis=3)
    img2 = box_filter(img2, r)
    mean = F.split(img2, axis=3)
    mean_i = F.stack(mean[0], mean[1], mean[2], axis=3)
    mean_ii = F.stack(mean[3], mean[4], mean[5], axis=3)
    var_i = mean_ii - mean_i * mean_i
    a = var_i / (var_i + eps)
    b = mean_i - a * mean_i
    ab = F.concatenate(a, b, axis=3)
    ab = box_filter(ab, r)
    mean_ab = F.split(ab, axis=3)
    mean_a = F.stack(mean_ab[0], mean_ab[1], mean_ab[2], axis=3)
    mean_b = F.stack(mean_ab[3], mean_ab[4], mean_ab[5], axis=3)
    q = mean_a * img + mean_b
    return q
예제 #21
0
def lstm(inputs, units, initial_state=None, return_sequences=False, return_state=False, fix_parameters=False):
    '''
    A long short-term memory
    Args:
        inputs (nnabla.Variable): A shape of [B, SentenceLength, EmbeddingSize].
        units (int): Dimensionality of the output space.
        initial_state ([nnabla.Variable, nnabla.Variable]): A tuple of an initial cell and an initial hidden state.
        return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence.
        return_state (bool): Whether to return the last state which is consist of the cell and the hidden state.
        fix_parameters (bool): Fix parameters (Set need_grad=False).
    Returns:
        nn.Variable: A shape [B, SentenceLength, units].
        or
        nn.Variable: A shape [B, units]
    '''
    
    batch_size = inputs.shape[0]

    if initial_state is None:
        c0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)))
        h0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)))
    else:
        assert type(initial_state) is tuple or type(initial_state) is list, \
               'initial_state must be a typle or a list.'
        assert len(initial_state) == 2, \
               'initial_state must have only two states.'

        c0, h0 = initial_state

        assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
        assert c0.shape[0] == batch_size, \
               'batch size of initial_state ({0}) is different from that of inputs ({1}).'.format(c0.shape[0], batch_size)
        assert c0.shape[1] == units, \
               'units size of initial_state ({0}) is different from that of units of args ({1}).'.format(c0.shape[1], units)

    cell = c0
    hidden = h0

    hs = []

    for x in F.split(inputs, axis=1):
        cell, hidden = lstm_cell(x, cell, hidden)
        hs.append(hidden)

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret
예제 #22
0
 def network(self, x_in, name='LSTM', n_hidden=32):
     hlist = []
     for x_i in F.split(x_in, axis=1):
         self._h, self._c = self._lstm_cell(name, n_hidden, x_i, self._h, self._c)
         with nn.parameter_scope(name + '_Affine_2'):
             self._h = PF.affine(self._h, (self._cols_size,))
         hlist.append(self._h)
     h = F.stack(*hlist, axis=1)
     h = F.slice(h, start=[0, h.shape[1]-self._x_output_length, 0],
             stop=[self._batch_size, h.shape[1], self._cols_size],
             step=[1, 1, 1])
     return h
예제 #23
0
def LSTM(inputs, units, return_sequences=False, name='lstm'):
    '''
    A long short-term memory layer
    Args:
        inputs (nnabla.Variable): A shape of [B, SentenceLength, EmbeddingSize].
        units (int): Dimensionality of the output space.
        return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence.
    Returns:
        nn.Variable: A shape [B, SentenceLength, units].
        or
        nn.Variable: A shape [B, units]
    '''

    hs = []
    batch_size = inputs.shape[0]
    sentence_length = inputs.shape[1]
    c0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)))
    h0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)))

    inputs = F.split(inputs, axis=1)

    cell = c0
    hidden = h0

    with nn.parameter_scope(name):
        for x in inputs:
            a = F.tanh(
                PF.affine(x, units, with_bias=False, name='Wa') +
                PF.affine(hidden, units, name='Ra'))
            input_gate = F.sigmoid(
                PF.affine(x, units, with_bias=False, name='Wi') +
                PF.affine(hidden, units, name='Ri'))
            forgate_gate = F.sigmoid(
                PF.affine(x, units, with_bias=False, name='Wf') +
                PF.affine(hidden, units, name='Rf'))
            cell = input_gate * a + forgate_gate * cell
            output_gate = F.sigmoid(
                PF.affine(x, units, with_bias=False, name='Wo') +
                PF.affine(hidden, units, name='Ro'))
            hidden = output_gate * F.tanh(cell)
            if return_sequences:
                hidden = F.reshape(hidden, (batch_size, 1, units))
            hs.append(hidden)

    if return_sequences:
        hs = F.concatenate(*hs, axis=1)
        hs = F.reshape(hs, (batch_size, sentence_length, units))
        return hs
    else:
        return hs[-1]
예제 #24
0
        def conv_bn_relu(h, i, name, skip=True):
            s = h
            imaps = h.shape[1]
            with nn.parameter_scope(name):
                h = PF.convolution(h, imaps, (3, 3), pad=(1, 1))
                h = PF.batch_normalization(h)
                h = F.relu(h)
            if not skip:
                return F.concatenate(*[h, s], axis=1) if i % 2 == 0 else h + s

            h = F.split(h, axis=1)
            h = [h_.reshape(h_.shape[:1] + (1, ) + h_.shape[1:]) for h_ in h]
            h = F.concatenate(*h, axis=1)
            return h
예제 #25
0
파일: rnn.py 프로젝트: Pandinosaurus/nnabla
def _lstm(x, h, c, w, b, with_bias):
    """LSTM cell.
    Args:
        x (:obj:`~nnabla.Variable`): Input data.
        h (:obj:`~nnabla.Variable`): Short-term state.
        c (:obj:`~nnabla.Variable`): Long-term state.
        w (:obj:`~nnabla.Variable`): Weight.
        b (:obj:`~nnabla.Variable`): Bias.
        with_bias (bool): Include the bias or not.
    """
    hidden_size = h.shape[1]
    xh = F.concatenate(*(x, h), axis=1)
    w0, w1, w2, w3 = F.split(w, axis=0)
    b0 = b1 = b2 = b3 = None
    if with_bias:
        b0, b1, b2, b3 = F.split(b, axis=0)
    i_t = F.affine(xh, F.transpose(w0, (1, 0)), b0)
    f_t = F.affine(xh, F.transpose(w1, (1, 0)), b1)
    g_t = F.affine(xh, F.transpose(w2, (1, 0)), b2)
    o_t = F.affine(xh, F.transpose(w3, (1, 0)), b3)
    c_t = F.sigmoid(f_t) * c + F.sigmoid(i_t) * F.tanh(g_t)
    h_t = F.sigmoid(o_t) * F.tanh(c_t)

    return h_t, c_t
예제 #26
0
def LSTM(inputs,
         units,
         initial_state=None,
         return_sequences=False,
         return_state=False,
         name='lstm'):

    batch_size = inputs.shape[0]

    if initial_state is None:

        c0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)),
                                          need_grad=True)
        h0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)),
                                          need_grad=True)
    else:
        assert type(initial_state) is tuple or type(initial_state) is list, \
               'initial_state must be a typle or a list.'
        assert len(initial_state) == 2, \
               'initial_state must have only two states.'

        c0, h0 = initial_state

        assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
        assert c0.shape[0] == batch_size, \
               'batch size of initial_state ({0}) is different from that of inputs ({1}).'.format(c0.shape[0], batch_size)
        assert c0.shape[1] == units, \
               'units size of initial_state ({0}) is different from that of units of args ({1}).'.format(c0.shape[1], units)

    cell = c0
    hidden = h0

    hs = []

    for x in F.split(inputs, axis=1):
        with nn.parameter_scope(name):
            cell, hidden = LSTMCell(x, cell, hidden)
        hs.append(hidden)

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret
예제 #27
0
def stack_backward(inputs, axis=0):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    yshape = dy.shape
    if yshape[axis] == 1:
        reshape = yshape[:axis] + yshape[axis + 1:]
        return F.reshape(dy, reshape, inplace=False)
    dx_list = F.split(dy, axis=axis)
    return dx_list
예제 #28
0
def create_network(batchsize, imheight, imwidth, args):
    import gc
    gc.collect()
    nnabla_ext.cuda.clear_memory_cache()

    anchors = args.num_anchors
    classes = args.num_classes
    yolo_x = nn.Variable((batchsize, 3, imheight, imwidth))
    yolo_features = yolov2.yolov2(yolo_x, anchors, classes, test=False)

    nB = yolo_features.shape[0]
    nA = args.num_anchors
    nC = args.num_classes
    nH = yolo_features.shape[2]
    nW = yolo_features.shape[3]

    output = yolo_features.get_unlinked_variable(need_grad=True)
    # TODO: Workaround until v1.0.2.
    # Explicitly enable grad since need_grad option above didn't work.
    output.need_grad = True

    output = F.reshape(output, (nB, nA, (5 + nC), nH, nW))
    output_splitted = F.split(output, 2)
    x, y, w, h, conf = [v.reshape((nB, nA, nH, nW))
                        for v in output_splitted[0:5]]
    x, y, conf = map(F.sigmoid, [x, y, conf])

    cls = F.stack(*output_splitted[5:], axis=2)
    cls = cls.reshape((nB*nA, nC, nH*nW))
    cls = F.transpose(cls, [0, 2, 1]).reshape((nB*nA*nH*nW, nC))

    tx, ty, tw, th, tconf, coord_mask, conf_mask_sq = [
        nn.Variable(v.shape) for v in [x, y, w, h, conf, x, conf]]
    cls_ones, cls_mask = [nn.Variable(cls.shape) for _ in range(2)]
    tcls, cls_mask_bb = [nn.Variable((cls.shape[0], 1)) for _ in range(2)]

    coord_mask_sq = F.pow_scalar(coord_mask, 2)
    loss_x = args.coord_scale * F.sum(F.squared_error(x, tx) * coord_mask_sq)
    loss_y = args.coord_scale * F.sum(F.squared_error(y, ty) * coord_mask_sq)
    loss_w = args.coord_scale * F.sum(F.squared_error(w, tw) * coord_mask_sq)
    loss_h = args.coord_scale * F.sum(F.squared_error(h, th) * coord_mask_sq)
    loss_conf = F.sum(F.squared_error(conf, tconf) * conf_mask_sq)
    loss_cls = args.class_scale * \
        F.sum(cls_mask_bb * F.softmax_cross_entropy(cls + cls_ones - cls_mask, tcls))
    loss_nnabla = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

    return yolo_x, yolo_features, (x, y, w, h, conf, cls), (tx, ty, tw, th, tconf, coord_mask, conf_mask_sq, cls_ones, cls_mask, tcls, cls_mask_bb), loss_nnabla
예제 #29
0
    def time_distributed_func(x, *args, **kwargs):
        ret = []
        batch_size = x.shape[0]
        length = x.shape[1]
        dim = x.shape[2] if x.ndim > 2 else 1
        if length > 1:
            xs = F.split(x, axis=1)
        else:
            xs = [F.reshape(x, (batch_size, dim))]
        for x_ in xs:
            value = func(x_, *args, **kwargs)
            _, output_dim = value.shape
            ret.append(F.reshape(value, (batch_size, 1, output_dim)))

        if length > 1:
            return F.concatenate(*ret, axis=1)
        else:
            return ret[0]
예제 #30
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        axis = self.forward_func.info.args["axis"]

        # Compute
        # w.r.t. dy_{0}, ..., dy_{N-1}
        g_dx = outputs[0].grad
        g_dy_list = list(F.split(g_dx, axis))
        g_dy_list.reverse()
        for i in range(len(inputs[1:])):
            g_dy = inputs[i + 1].grad
            g_dy_ = g_dy_list[i]
            if prop_down[i + 1]:
                if accum[i + 1]:
                    g_dy += g_dy_
                else:
                    g_dy.copy_from(g_dy_)