コード例 #1
0
def adjust_kp(kp_source,
              kp_driving,
              kp_driving_initial,
              adapt_movement_scale=1,
              use_relative_movement=False,
              use_relative_jacobian=False):

    kp_new = {k: v for k, v in kp_driving.items()}

    if use_relative_movement:
        kp_value_diff = (kp_driving['value'] - kp_driving_initial['value'])
        kp_value_diff *= adapt_movement_scale
        kp_new['value'] = kp_value_diff + kp_source['value']

        if use_relative_jacobian:
            jacobian_diff = F.batch_matmul(
                kp_driving['jacobian'],
                F.reshape(
                    F.batch_inv(
                        F.reshape(kp_driving_initial['jacobian'], (-1, ) +
                                  kp_driving_initial['jacobian'].shape[-2:],
                                  inplace=False)),
                    kp_driving_initial['jacobian'].shape))
            kp_new['jacobian'] = F.batch_matmul(jacobian_diff,
                                                kp_source['jacobian'])

    return kp_new
コード例 #2
0
ファイル: model.py プロジェクト: sony/nnabla-examples
    def warp_coordinates(self, coordinates):
        theta = self.theta
        theta = F.reshape(
            theta, theta.shape[:1] + (1,) + theta.shape[1:], inplace=False)
        if coordinates.shape[0] == self.bs:
            transformed = F.batch_matmul(
                            F.tile(theta[:, :, :, :2],
                                   (1, coordinates.shape[1], 1, 1)),
                            F.reshape(coordinates, coordinates.shape + (1,), inplace=False)) + theta[:, :, :, 2:]
        else:
            transformed = F.batch_matmul(
                            F.tile(theta[:, :, :, :2],
                                   (1, coordinates.shape[1], 1, 1)),
                            F.tile(F.reshape(coordinates, coordinates.shape + (1,), inplace=False),
                                   (self.bs / coordinates.shape[0], 1, 1, 1))) + theta[:, :, :, 2:]
        transformed = F.reshape(
            transformed, transformed.shape[:-1], inplace=False)

        if self.tps:
            control_points = self.control_points
            control_params = self.control_params
            distances = F.reshape(
                coordinates, (coordinates.shape[0], -1, 1, 2), inplace=False) - F.reshape(control_points, (1, 1, -1, 2))
            distances = F.sum(F.abs(distances), axis=distances.ndim - 1)

            result = distances ** 2
            result = result * F.log(distances + 1e-6)
            result = result * control_params
            result = F.sum(result, axis=2)
            result = F.reshape(
                result, (self.bs, coordinates.shape[1], 1), inplace=False)
            transformed = transformed + result

        return transformed
コード例 #3
0
def attnblock(h, r=8, fix_parameters=False, sn=True, test=False):
    """Attention block"""
    x = h

    # 1x1 convolutions
    b, c, s0, s1 = h.shape
    c_r = c // r
    assert c_r > 0
    f_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="f",
                      with_bias=False, sn=sn, test=test)
    g_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="g",
                      with_bias=False, sn=sn, test=test)
    h_x = convolution(h, c, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="h",
                      with_bias=False, sn=sn, test=test)

    # Attend
    attn = F.batch_matmul(f_x.reshape(
        [b, c_r, -1]), g_x.reshape([b, c_r, -1]), transpose_a=True)
    attn = F.softmax(attn, 1)
    h_x = h_x.reshape([b, c, -1])
    o = F.batch_matmul(h_x, attn)
    o = F.reshape(o, [b, c, s0, s1])

    # Shortcut
    gamma = get_parameter_or_create(
        "gamma", [1, 1, 1, 1], ConstantInitializer(0.), not fix_parameters)
    y = gamma * o + x
    return y
コード例 #4
0
def attn_block(x, name, num_heads=4, fix_parameters=False):
    """Multihead attention block"""
    B, C, H, W = x.shape

    with nn.parameter_scope(name):
        # Get query, key, value
        h = normalize(x, name="norm")
        # nin(3 * C) -> split is faster?
        q = nin(h, C, name="q")
        k = nin(h, C, name="k")
        v = nin(h, C, name="v")

        # Attention
        w = F.batch_matmul(F.reshape(q, (B * num_heads, -1, H * W)),
                           F.reshape(k, (B * num_heads, -1, H * W)),
                           transpose_a=True)
        w = F.mul_scalar(w, int(C)**(-0.5), inplace=True)

        assert w.shape == (B * num_heads, H * W, H * W)
        w = F.softmax(w, axis=-1)

        h = F.reshape(v, (B * num_heads, -1, H * W))
        h = F.batch_matmul(h, w)
        h = F.reshape(h, (B, C, H, W))

        # output projection
        h = nin(h, C, name='proj_out', zeroing_w=True)

    assert h.shape == x.shape
    return F.add2(h, x, inplace=True)
コード例 #5
0
ファイル: model.py プロジェクト: sony/nnabla-examples
def _scaled_dot_product_attention(q, k, v, attn_mask, dropout):
    B, Nt, E = q.shape
    q *= float(E)**-0.5
    # (B, Nt, E) x (B, E, Ns) -> (B, Nt, Ns)
    attn = F.batch_matmul(q, k, transpose_b=True)
    if attn_mask is not None:
        attn += attn_mask
    attn_output_weights = F.softmax(attn, axis=len(attn.shape) - 1)
    if dropout > 0.0:
        attn = F.dropout(attn, p=dropout)
    # (B, Nt, Ns) x (B, Ns, E) -> (B, Nt, E)
    attn_output = F.batch_matmul(attn_output_weights, v)
    return attn_output, attn_output_weights
コード例 #6
0
def batch_matmul_backward(inputs, transpose_a=False, transpose_b=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dc = inputs[0]
    a = inputs[1]
    b = inputs[2]
    if (transpose_a, transpose_b) == (True, True):
        da = F.batch_matmul(b, dc, True, True)
        db = F.batch_matmul(dc, a, True, True)
    elif (transpose_a, transpose_b) == (True, False):
        da = F.batch_matmul(b, dc, False, True)
        db = F.batch_matmul(a, dc, False, False)
    elif (transpose_a, transpose_b) == (False, True):
        da = F.batch_matmul(dc, b, False, False)
        db = F.batch_matmul(dc, a, True, False)
    elif (transpose_a, transpose_b) == (False, False):
        da = F.batch_matmul(dc, b, False, True)
        db = F.batch_matmul(a, dc, True, False)
    da = _sum(da, a)
    db = _sum(db, b)
    return da, db
コード例 #7
0
def batch_inv_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    x0_inv = get_output(x0, "BatchInv")
    t01 = -F.batch_matmul(x0_inv, dy, True, False)
    dx0 = F.batch_matmul(t01, x0_inv, False, True)
    return dx0
コード例 #8
0
    def compute_context(prev_state):
        batch_size = prev_state.shape[0]
        ht = PF.affine(prev_state,
                       attention_units,
                       with_bias=False,
                       name='Waht')
        # -> (batch_size, attention_units)
        ht = F.reshape(ht, (batch_size, 1, attention_units))
        # -> (batch_size, 1, attention_units)
        ht = F.broadcast(ht,
                         (batch_size, sentence_length_source, attention_units))
        # -> (batch_size, sentence_length_source, attention_units)

        attention = F.tanh(hs + ht)
        # -> (batch_size, sentence_length_source, attention_units)
        attention = time_distributed(PF.affine)(attention,
                                                1,
                                                with_bias=False,
                                                name='attention')
        # -> (batch_size, sentence_length_source, 1)
        attention = F.softmax(attention, axis=1)
        # -> (batch_size, sentence_length_source, 1)

        context = F.batch_matmul(hs, attention, transpose_a=True)
        context = F.reshape(context, (batch_size, attention_units))

        return context
コード例 #9
0
def classification_loss_with_orthogonal_loss(
        pred_logit: nn.Variable,
        label: nn.Variable,
        transformation_mat: nn.Variable,
        reg_weight=0.001) -> Tuple[nn.Variable, Dict[str, nn.Variable]]:
    """classification loss with orthogonal loss

    Args:
        pred_logit (nn.Variable): pred logit, shape(batch, num_classes)
        label (nn.Variable): label, shape(batch, 1)
        transformation_mat (nn.Variable): label, shape(batch, K, K)

    Returns:
        Tuple[nn.Variable, Dict[str, nn.Variable]]: loss and internal loss
    """
    cross_entropy_loss = F.softmax_cross_entropy(pred_logit, label)
    classify_loss = F.mean(cross_entropy_loss)

    # Enforce the transformation as orthogonal matrix
    mat_squared = F.batch_matmul(transformation_mat,
                                 F.transpose(transformation_mat, (0, 2, 1)))
    batch_size, k, _ = transformation_mat.shape
    target_array = np.tile(np.eye(k, dtype=np.float32), (batch_size, 1, 1))
    target = nn.Variable.from_numpy_array(target_array)
    mat_diff = mat_squared - target

    # Frobenius norm
    mat_diff = F.reshape(mat_diff, (batch_size, -1))
    mat_loss = F.mean(F.norm(mat_diff, axis=1))

    return classify_loss + mat_loss * reg_weight, {
        "classify_loss": classify_loss,
        "mat_loss": mat_loss,
        "mat_diff": mat_diff,
    }
コード例 #10
0
ファイル: ops.py プロジェクト: shikisawamura/nnabla-examples
def Bahdanau_attention(query, values, out_features, scope):
    r"""Return the Bahdanau attention mechanism.

    Args:
        query (nn.Variable): A query of size (B, 1, C).
        values (nn.Variable): Values of size (B, T, C).
        out_features (int): The projected dimensionality.
        scope (str): Parameter scope.

    Returns:
        nn.Variable: The context vector.
        nn.Variable: The attention weight vector.
    """
    with nn.parameter_scope(scope):
        x = PF.affine(query, out_features, base_axis=2,
                      with_bias=False, name='query')
        y = PF.affine(values, out_features, base_axis=2,
                      with_bias=False, name='values')
        # scores of shape (B, T, 1)
        scores = PF.affine(F.tanh(x + y), 1, base_axis=2,
                           with_bias=False, name='scores')
        # attention_weights of shape (B, 1, T)
        attention_weights = F.softmax(
            scores, axis=1).reshape((query.shape[0], 1, -1))
        # context_vector shape after sum == (B, 1, C)
        context_vector = F.batch_matmul(attention_weights, values)

    return context_vector, attention_weights
コード例 #11
0
ファイル: ops.py プロジェクト: sony/nnabla-examples
def dyn_2d_filter(x, lf_2d, k_sz):
    """
    Dynamic 2d filtering
    """
    with nn.parameter_scope('Dynamic_2D_Filtering'):
        f_localexpand = nn.Variable.from_numpy_array(
            np.eye(k_sz[0] * k_sz[1], k_sz[0] * k_sz[1]))
        f_localexpand = F.reshape(
            f_localexpand,
            (k_sz[0], k_sz[1], 1, k_sz[0] * k_sz[1]))  # (9,9,1,81))
        f_localexpand = F.transpose(f_localexpand, (3, 0, 1, 2))  # (81,9,9,1))
        x_sz = x.shape
        x = F.reshape(x, (x_sz[0], x_sz[1], x_sz[2], 1))  # (1,100,170,1)
        x_localexpand = F.convolution(x,
                                      f_localexpand,
                                      stride=(1, 1),
                                      pad=(4, 4),
                                      channel_last=True)  # (1,100,170,81)
        x_le_sz = x_localexpand.shape
        x_localexpand = F.reshape(
            x_localexpand, (x_le_sz[0], x_le_sz[1], x_le_sz[2], 1, x_le_sz[3]))
        y = F.batch_matmul(x_localexpand, lf_2d)
        y_sz = y.shape
        y = F.reshape(y, (y_sz[0], y_sz[1], y_sz[2], y_sz[4]))
    return y
コード例 #12
0
def sample_noise(inpt_size, out_size):
    _f = lambda x: F.sign(x) * F.pow_scalar(F.abs(x), 0.5)
    noise = _f(F.randn(shape=(inpt_size + out_size, )))
    eps_w = F.batch_matmul(F.reshape(noise[:inpt_size], (1, -1)),
                           F.reshape(noise[inpt_size:], (1, -1)), True)
    eps_b = noise[inpt_size:]
    return eps_w, eps_b
コード例 #13
0
ファイル: model.py プロジェクト: sony/nnabla-examples
def logits(image, text):
    image_features = encode_image(image)
    text_features = encode_text(text)

    # normalized features
    image_features = image_features / \
        F.norm(image_features, axis=1, keepdims=True)
    text_features = text_features / \
        F.norm(text_features, axis=1, keepdims=True)

    # cosine similarity as logits
    logit_scale = nn.parameter.get_parameter_or_create(name='logit_scale',
                                                       shape=())
    logit_scale = F.exp(logit_scale)

    image_features = image_features.reshape(
        (1, image_features.shape[0], image_features.shape[1]))
    text_features = F.transpose(text_features, (1, 0))
    text_features = text_features.reshape(
        (1, text_features.shape[0], text_features.shape[1]))

    per_image = F.batch_matmul(image_features, text_features).reshape(
        (image_features.shape[0], -1))
    logits_per_image = logit_scale.reshape((1, 1)) * per_image

    logits_per_text = F.transpose(logits_per_image, (1, 0))

    # shape = [global_batch_size, global_batch_size]
    return logits_per_image, logits_per_text
コード例 #14
0
def transform(point, center, scale, resolution, invert=False):
    """Generate and affine transformation matrix.

    Given a set of points, a center, a scale and a targer resolution, the
    function generates and affine transformation matrix. If invert is ``True``
    it will produce the inverse transformation.

    Arguments:
        point {numpy.array} -- the input 2D point
        center {numpy.array} -- the center around which to perform the transformations
        scale {float} -- the scale of the face/object
        resolution {float} -- the output resolution

    Keyword Arguments:
        invert {bool} -- define wherever the function should produce the direct or the
        inverse transformation matrix (default: {False})
    """
    point.append(1)

    h = 200.0 * scale
    t = F.matrix_diag(F.constant(1, [3]))
    t.d[0, 0] = resolution / h
    t.d[1, 1] = resolution / h
    t.d[0, 2] = resolution * (-center[0] / h + 0.5)
    t.d[1, 2] = resolution * (-center[1] / h + 0.5)

    if invert:
        t = F.reshape(F.batch_inv(F.reshape(t, [1, 3, 3])), [3, 3])

    _pt = nn.Variable.from_numpy_array(point)

    new_point = F.reshape(F.batch_matmul(
        F.reshape(t, [1, 3, 3]), F.reshape(_pt, [1, 3, 1])), [3, ])[0:2]

    return new_point.d.astype(int)
コード例 #15
0
ファイル: model.py プロジェクト: sony/nnabla-examples
def vision_transformer(x, input_res, patch_size, v_width, v_layers, v_heads,
                       embed_dim):
    scale = v_width**-0.5

    with nn.parameter_scope("visual"):
        con1_w = nn.parameter.get_parameter_or_create(name="conv1/W",
                                                      shape=(v_width, 3,
                                                             patch_size,
                                                             patch_size))
        x = F.convolution(
            x, con1_w, bias=None,
            stride=(patch_size, patch_size))  # shape = [*, width, grid, grid]

        # shape = [*, width, grid ** 2]
        x = F.reshape(x, (x.shape[0], x.shape[1], -1))
        x = F.transpose(x, (0, 2, 1))  # shape = [*, grid ** 2, width]

        z = np.zeros((x.shape[0], 1, x.shape[-1]))
        zeros = nn.Variable.from_numpy_array(z)
        class_embed = nn.parameter.get_parameter_or_create(
            name="class_embedding", shape=(v_width, )).reshape(
                (x.shape[0], 1, v_width))
        # shape = [*, grid ** 2 + 1, width]
        x = F.concatenate(class_embed + zeros, x, axis=1)

        positional_embedding = nn.parameter.get_parameter_or_create(
            name='positional_embedding',
            shape=((input_res // patch_size)**2 + 1, v_width)).reshape(
                (x.shape[0], x.shape[1], v_width))
        x = x + positional_embedding

        ln_pre_w = nn.parameter.get_parameter_or_create(
            name="ln_pre/W", shape=(v_width, )).reshape((1, 1, v_width))
        ln_pre_b = nn.parameter.get_parameter_or_create(
            name="ln_pre/b", shape=(v_width, )).reshape((1, 1, v_width))
        x = F.layer_normalization(x, ln_pre_b, ln_pre_w, batch_axis=(0, 1))

        x = F.transpose(x, (1, 0, 2))  # NLD -> LND

        x = transformer(x, v_width, v_layers, v_heads)

        x = F.transpose(x, (1, 0, 2))  # LND -> NLD

        ln_post_w = nn.parameter.get_parameter_or_create(
            name="ln_post/W", shape=(v_width, )).reshape((1, 1, v_width))
        ln_post_b = nn.parameter.get_parameter_or_create(
            name="ln_post/b", shape=(v_width, )).reshape((1, 1, v_width))
        x = F.slice(x, stop=(x.shape[0], 1, x.shape[2]))
        x = F.layer_normalization(x, ln_post_b, ln_post_w)

        if 'proj' in nn.get_parameters():
            visual_proj = nn.parameter.get_parameter_or_create(
                name="proj", shape=(v_width, embed_dim)).reshape(
                    (1, v_width, -1))
            x = F.batch_matmul(x, visual_proj)

        x = x.reshape((-1, embed_dim))

    return x
コード例 #16
0
ファイル: train.py プロジェクト: satopirka/nlp-nnabla
def build_self_attention_model(train=True):
    x = nn.Variable((batch_size, max_len))
    t = nn.Variable((batch_size, 1))
    mask = get_mask(x)
    attention_mask = (F.constant(1, shape=mask.shape) - mask) * F.constant(
        np.finfo(np.float32).min, shape=mask.shape)
    with nn.parameter_scope('embedding'):
        h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask
    with nn.parameter_scope('forward'):
        h_f = lstm(h,
                   hidden_size,
                   mask=mask,
                   return_sequences=True,
                   return_state=False)
    with nn.parameter_scope('backward'):
        h_b = lstm(h[:, ::-1, ],
                   hidden_size,
                   mask=mask,
                   return_sequences=True,
                   return_state=False)[:, ::-1, ]
    h = F.concatenate(h_f, h_b, axis=2)
    if train:
        h = F.dropout(h, p=dropout_ratio)
    with nn.parameter_scope('da'):
        a = F.tanh(time_distributed(PF.affine)(h, da))
        if train:
            a = F.dropout(a, p=dropout_ratio)
    with nn.parameter_scope('r'):
        a = time_distributed(PF.affine)(a, r)
        if train:
            a = F.dropout(a, p=dropout_ratio)
        a = F.softmax(a + attention_mask, axis=1)
    m = F.batch_matmul(a, h, transpose_a=True)
    with nn.parameter_scope('output_mlp'):
        output = F.relu(PF.affine(m, output_mlp_size))
        if train:
            output = F.dropout(output, p=dropout_ratio)
    with nn.parameter_scope('output'):
        y = F.sigmoid(PF.affine(output, 1))

    accuracy = F.mean(F.equal(F.round(y), t))
    loss = F.mean(F.binary_cross_entropy(
        y, t)) + attention_penalty_coef * frobenius(
            F.batch_matmul(a, a, transpose_a=True) - batch_eye(batch_size, r))
    return x, t, accuracy, loss
コード例 #17
0
ファイル: spectral_norm.py プロジェクト: Pandinosaurus/nnabla
def _spectral_norm_outer_most_dim_backward(dw_sn, w, u, itr=1, eps=1e-12):
    # Forward recomputation

    w_shape = w.shape
    d0 = np.prod(w.shape[0:-1])  # In
    d1 = w.shape[-1]             # Out
    w = F.reshape(w, [d0, d1])
    u = F.reshape(u, [d1, 1])
    # Power method
    for _ in range(itr):
        # v
        v = F.affine(w, u)
        v = v / ((F.sum(v ** 2.0, keepdims=True) + eps) ** 0.5)
        v = F.reshape(v, [1, d0])
        # u
        u = F.affine(v, w)
        u = u / ((F.sum(u ** 2.0, keepdims=True) + eps) ** 0.5)
        u = F.reshape(u, [d1, 1])
    # No grad
    u = no_grad(u)
    v = no_grad(v)
    # Spectral normalization
    vw = F.affine(v, w)
    sigma = F.affine(vw, u)
    w_sn = w / sigma
    # The fowllowing process is not necessary for gradient calculation
    # w_sn = F.reshape(w_sn, w_shape)

    # Backward for spectral norm
    dw_sn = dw_sn.reshape(w.shape)
    # Sum for broadcast backward
    S = sum_for_arithmetics(dw_sn * w_sn, sigma)
    # Add batch axis
    S = S.reshape((1,) + S.shape)
    u = u.reshape((1,) + u.shape)
    v = v.reshape((1,) + v.shape)
    m = F.batch_matmul(v, S, transpose_a=True)
    m = F.batch_matmul(m, u, transpose_b=True)
    # Remove batch axis
    m = m.reshape((m.shape[1], m.shape[2]))
    dw = (dw_sn - m) / sigma
    dw = dw.reshape(w_shape)

    return dw, None
コード例 #18
0
ファイル: functions.py プロジェクト: satopirka/nlp-nnabla
def attention(query,
              key,
              value,
              mask: Optional[nn.Variable] = None,
              train: bool = True,
              dropout_ratio: float = 0.1,
              fix_parameters=False):
    '''
    A global attention layer
    Args:
        inputs (nnabla.Variable): A shape of [B, sen_len_query, units]
        memory (nnabla.Variable): A shape of [B, sen_len_memory, units]
        mask (nnabla.Variable): A shape of [B, sen_len_query, sen_len_memory]
        fix_parameters (bool): Fix parameters (Set need_grad=False).
    Returns:
        nn.Variable: A shape [B, units].
    '''
    batch_size, sentence_length_query, embedding_size = query.shape
    batch_size, sentence_length_memory, embedding_size = key.shape
    q = query
    # -> (batch_size, sentence_length_query, embedding_size)
    k = key
    # -> (batch_size, sentence_length_memory, embedding_size)
    v = value
    # -> (batch_size, sentence_length_memory, embedding_size)

    logit = F.batch_matmul(q, k, transpose_b=True) * (embedding_size**-0.5)
    # -> (batch_size, sentence_length_query, sentence_length_memory)

    # maskのshapeは-> (batch_size, sentence_length_query, sentence_length_memory)である
    if mask is not None:
        logit += get_attention_logit_mask(mask)

    attention_weights = F.softmax(logit, axis=2)
    # -> (batch_size, sentence_length_query, sentence_length_memory)

    if train:
        attention_weights = F.dropout(attention_weights, p=dropout_ratio)

    attention_output = F.batch_matmul(attention_weights, v)
    # -> (batch_size, sentence_length_query, embedding_size)

    return attention_output
コード例 #19
0
def equivariance_jacobian_loss(kp_driving_jacobian, arithmetic_jacobian,
                               trans_kp_jacobian, weight):
    jacobian_transformed = F.batch_matmul(arithmetic_jacobian,
                                          trans_kp_jacobian)

    normed_driving = F.reshape(
        F.batch_inv(
            F.reshape(kp_driving_jacobian,
                      (-1, ) + kp_driving_jacobian.shape[-2:])),
        kp_driving_jacobian.shape)

    normed_transformed = jacobian_transformed
    value = F.batch_matmul(normed_driving, normed_transformed)

    eye = nn.Variable.from_numpy_array(np.reshape(np.eye(2), (1, 1, 2, 2)))

    jacobian_loss = F.mean(F.absolute_error(eye, value))
    loss = weight * jacobian_loss
    return loss
コード例 #20
0
 def compute_mel(self, wave):
     hp = self.hparams
     reals, imags = F.stft(wave,
                           window_size=hp.win_length,
                           stride=hp.hop_length,
                           fft_size=hp.n_fft)
     linear = F.pow_scalar(
         F.add2(F.pow_scalar(reals, 2), F.pow_scalar(imags, 2)), 0.5)
     mels = F.batch_matmul(self.basis, linear)
     mels = F.log(F.clip_by_value(mels, 1e-5,
                                  np.inf)).apply(need_grad=False)
     return mels
コード例 #21
0
def create_sparse_motions(source_image, kp_driving, kp_source, num_kp):
    bs, _, h, w = source_image.shape
    identity_grid = make_coordinate_grid((h, w))
    identity_grid = F.reshape(identity_grid,
                              (1, 1, h, w, 2))  # (1, 1, h, w, 2)
    coordinate_grid = identity_grid - \
        F.reshape(kp_driving['value'], (bs, num_kp, 1, 1, 2), inplace=False)

    if 'jacobian' in kp_driving:
        jacobian = F.batch_matmul(
            kp_source['jacobian'],
            F.reshape(
                F.batch_inv(
                    F.reshape(kp_driving['jacobian'],
                              (-1, ) + kp_driving['jacobian'].shape[-2:],
                              inplace=False)), kp_driving['jacobian'].shape))
        # what it does
        # batched_driving_jacobian = F.reshape(kp_driving['jacobian'], (-1) + kp_driving['jacobian'].shape[-2:])
        # batched_inverse_jacobian = F.batch_inv(batched_driving_jacobian)
        # inverse_jacobian = F.reshape(batched_inverse_jacobian, kp_driving['jacobian'].shape)

        jacobian = F.reshape(
            jacobian, jacobian.shape[:-2] + (1, 1) + jacobian.shape[-2:])
        jacobian = F.broadcast(
            jacobian, jacobian.shape[:2] + (h, w) + jacobian.shape[-2:])

        coordinate_grid = F.batch_matmul(
            jacobian, F.reshape(coordinate_grid,
                                coordinate_grid.shape + (1, )))
        coordinate_grid = F.reshape(coordinate_grid,
                                    coordinate_grid.shape[:-1])

    driving_to_source = coordinate_grid + \
        F.reshape(kp_source['value'], (bs, num_kp, 1, 1, 2), inplace=False)

    # background feature
    identity_grid = F.broadcast(identity_grid, (bs, 1, h, w, 2))

    sparse_motions = F.concatenate(identity_grid, driving_to_source, axis=1)
    return sparse_motions
コード例 #22
0
ファイル: model.py プロジェクト: sony/nnabla-examples
def encode_text(text):
    param_dict = nn.get_parameters()

    embed_dim = param_dict['text_projection'].shape[1]
    context_length = param_dict['positional_embedding'].shape[0]
    vocab_size = param_dict['token_embedding/W'].shape[0]
    transformer_width = param_dict['ln_final/W'].shape[0]
    transformer_heads = transformer_width // 64
    transformer_layers = len(
        set(
            k.split('/')[2] for k in param_dict.keys()
            if k.startswith(f'transformer/resblocks')))

    token_embedding = nn.parameter.get_parameter_or_create(
        name='token_embedding/W', shape=(vocab_size, transformer_width))
    x = F.embed(text, token_embedding)  # [batch_size, n_ctx, d_model]

    positional_embedding = nn.parameter.get_parameter_or_create(
        name='positional_embedding',
        shape=(context_length, transformer_width)).reshape(
            (1, context_length, transformer_width))
    x = x + positional_embedding

    x = F.transpose(x, (1, 0, 2))  # NLD -> LND

    x = transformer(x,
                    transformer_width,
                    transformer_layers,
                    transformer_heads,
                    attn_mask=build_attn_mask(context_length))

    x = F.transpose(x, (1, 0, 2))  # LND -> NLD

    ln_final_W = nn.parameter.get_parameter_or_create(
        name='ln_final/W', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    ln_final_b = nn.parameter.get_parameter_or_create(
        name='ln_final/b', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    x = F.layer_normalization(x, ln_final_b, ln_final_W, batch_axis=(0, 1))

    idx = F.max(text, axis=-1, only_index=True)
    idx.forward()
    x = x[list(range(x.shape[0])), idx.d].reshape((1, x.shape[0], -1))
    text_projection = nn.parameter.get_parameter_or_create(
        name='text_projection', shape=(transformer_width, embed_dim)).reshape(
            (1, transformer_width, embed_dim))
    x = F.batch_matmul(x, text_projection)

    x = x.reshape((-1, embed_dim))

    return x
コード例 #23
0
    def unit_sphere_intersection(self, camloc, raydir):
        BR, _ = raydir.shape
        a = 1.0  # raydir is already normalized
        b = 2.0 * F.batch_matmul(F.reshape(camloc, (BR, 1, 3)),
                                 F.reshape(raydir, (BR, 3, 1)))
        c = F.batch_matmul(F.reshape(camloc, (BR, 1, 3)),
                           F.reshape(camloc, (BR, 3, 1))) - 1.0
        D = b**2 - 4 * a * c
        mask = F.reshape(F.greater_scalar(D, 0.0), (BR, 1))

        b = F.reshape(b, (BR, 1))
        D = F.reshape(D, (BR, 1))

        D = mask * D
        D_sqrt = D**0.5
        t_start = -(b + D_sqrt) / (2 * a)
        t_finish = -(b - D_sqrt) / (2 * a)

        t_start = t_start * mask + self.t_near * (1 - mask)
        t_finish = t_finish * mask + self.t_far * (1 - mask)

        return t_start, t_finish, mask
コード例 #24
0
def compute_mel(wave, basis, hp):
    r"""Compute the mel-spectrogram from the waveform.

    Args:
        wave (nn.Variable): Wavefrom variable of shape (B, 1, L).
        basis (nn.Variable): Basis for mel-spectrogram computation.
        hp (HParams): Hyper-parameters.

    Returns:
        nn.Variable: Output variable.
    """
    reals, imags = stft(wave,
                        window_size=hp.win_length,
                        stride=hp.hop_length,
                        fft_size=hp.n_fft)
    linear = (reals**2 + imags**2)**0.5
    mels = F.batch_matmul(basis, linear)
    mels = F.log(F.clip_by_value(mels, 1e-5, np.inf))

    return mels
コード例 #25
0
ファイル: network.py プロジェクト: sony/nnabla-examples
def sample_network(x_curr, sdf_cur, raydir, grad_curr):
    """
    x_curr: Points (B, R, 3) either on surface or not
    sdf_cur: SDF on x_curr (B, R, 1)
    raydir: Ray direction (B, R, 3)
    grad_curr: Gradients on x_curr (B, R, 3)
    """

    # Denominator
    de = F.batch_matmul(grad_curr[..., np.newaxis, :],
                        raydir[..., np.newaxis, :],
                        transpose_b=True)
    de = de.reshape(sdf_cur.shape)
    de_inv = (1.0 / de).apply(need_grad=False)
    de_inv = F.minimum_scalar(de_inv, 1e30).apply(
        need_grad=False)  # (numerical issue de = cos(x, y) = 0)
    # Differentiable intersection point (discrete update of implicit differentiation)
    sdf_cur0 = sdf_cur.get_unlinked_variable(need_grad=False)
    x_hat = x_curr - (sdf_cur - sdf_cur0) * de_inv * raydir
    return x_hat
コード例 #26
0
ファイル: audio.py プロジェクト: sony/ai-research-code
def log_mel_spectrogram(wave, sr, window_size, n_mels=80):
    """Return log mel-spectrogram.

    Args:
        wave (nn.Variable): Input waveform of shape (B, 1, L).
        sr (int): Sampling rate.
        window_size (int): Window size.
        n_mels (int): Number of mel banks.
        jitter (bool): Whether to apply random crop. Defaults to False.
        max_jitter_steps (int): Maximum number of jitter steps if jitter is
            set to `True`.

    Returns:
        nn.Variable: Log mel-spectrogram.
    """
    linear = spectrogram(wave, window_size)
    mel_basis = librosa_mel_fn(sr,
                               window_size,
                               n_mels=n_mels,
                               fmin=80.0,
                               fmax=7600.0)
    basis = nn.Variable.from_numpy_array(mel_basis[None, ...])
    mels = F.batch_matmul(basis, linear)
    return F.log(mels * 1e4 + 1.0)
コード例 #27
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        shape_a = inputs[0].shape
        shape_b = inputs[1].shape
        if shape_a[:-2] != shape_b[:-2]:
            raise ValueError("shape_a[:-2] () != shape_b[:-2] (). \n"
                             "Implicit broadcast is supported now.",
                             shape_a[:-2] != shape_b[:-2])

        # Args
        transpose_a = self.forward_func.info.args["transpose_a"]
        transpose_b = self.forward_func.info.args["transpose_b"]
        # Inputs
        x0 = inputs[0].data
        x1 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dx1 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_x1 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dx1 = outputs[1].grad

        # Computation
        if prop_down[0]:
            # condition
            if (transpose_a, transpose_b) == (True, True):
                g_x0_ = F.batch_matmul(g_dx1, dy, True, True)
            if (transpose_a, transpose_b) == (True, False):
                g_x0_ = F.batch_matmul(g_dx1, dy, False, True)
            if (transpose_a, transpose_b) == (False, True):
                g_x0_ = F.batch_matmul(dy, g_dx1, False, False)
            if (transpose_a, transpose_b) == (False, False):
                g_x0_ = F.batch_matmul(dy, g_dx1, False, True)
            # reshape for batch axes
            if g_x0_.shape != g_x0.shape:
                g_x0_ = F.reshape(g_x0_, g_x0.shape)
            if accum[0]:
                g_x0 += g_x0_
            else:
                g_x0.copy_from(g_x0_)
        if prop_down[1]:
            # condition
            if (transpose_a, transpose_b) == (True, True):
                g_x1_ = F.batch_matmul(dy, g_dx0, True, True)
            if (transpose_a, transpose_b) == (True, False):
                g_x1_ = F.batch_matmul(g_dx0, dy, False, False)
            if (transpose_a, transpose_b) == (False, True):
                g_x1_ = F.batch_matmul(dy, g_dx0, True, False)
            if (transpose_a, transpose_b) == (False, False):
                g_x1_ = F.batch_matmul(g_dx0, dy, True, False)
            # reshape for batch axes
            if g_x1_.shape != g_x1.shape:
                g_x1_ = F.reshape(g_x1_, g_x1.shape)
            if accum[1]:
                g_x1 += g_x1_
            else:
                g_x1.copy_from(g_x1_)
        if prop_down[2]:
            t1 = F.batch_matmul(g_dx0, x1, transpose_a, transpose_b)
            t2 = F.batch_matmul(x0, g_dx1, transpose_a, transpose_b)
            g_dy_ = t1 + t2
            if accum[2]:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
コード例 #28
0
                                       with_file_cache=False)

x = nn.Variable([batch_size, window_size * 2])
with nn.parameter_scope('W_in'):
    h = PF.embed(x, vocab_size, embedding_size)
h = F.mean(h, axis=1)
h = expand_dims(h, axis=-1)  # (batch_size, embedding_size, 1)
t = nn.Variable([batch_size, 1])
t_neg = nn.Variable([batch_size, k])
with nn.parameter_scope('W_out'):
    _t = PF.embed(t, vocab_size,
                  embedding_size)  # (batch_size, 1, embedding_size)
    _t_neg = PF.embed(t_neg, vocab_size,
                      embedding_size)  # (batch_size, k, embedding_size)

t_score = F.sigmoid(F.reshape(F.batch_matmul(_t, h), shape=(batch_size, 1)))
t_neg_score = F.sigmoid(
    F.reshape(F.batch_matmul(_t_neg, h), shape=(batch_size, k)))

t_loss = F.binary_cross_entropy(t_score, F.constant(1, shape=(batch_size, 1)))
t_neg_loss = F.binary_cross_entropy(t_neg_score,
                                    F.constant(0, shape=(batch_size, k)))

loss = F.mean(F.sum(t_loss, axis=1) + F.sum(t_neg_loss, axis=1))

# Create solver.
solver = S.Adam()
solver.set_parameters(nn.get_parameters())

trainer = Trainer(inputs=[x, t, t_neg], loss=loss, solver=solver)
trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch)
コード例 #29
0
def cond_att_lstm(x,
                  parent_index,
                  mask,
                  context,
                  context_mask,
                  state_size,
                  att_hidden_size,
                  initial_state=None,
                  initial_cell=None,
                  hist=None,
                  dropout=0,
                  train=True,
                  w_init=None,
                  inner_w_init=None,
                  b_init=I.ConstantInitializer(0),
                  forget_bias_init=I.ConstantInitializer(1)):
    """
    x: (batch_size, length, input_size)
    parent_index: (batch_size, length)
    mask: (batch_size, length)
    context: (batch_size, context_length, context_size)
    context_mask: (batch_size, context_length)
    hist: (batch_size, l, state_size)
    """
    batch_size, length, input_size = x.shape
    _, context_length, context_size = context.shape

    if w_init is None:
        w_init = I.UniformInitializer(
            I.calc_uniform_lim_glorot(input_size, state_size))
    if inner_w_init is None:
        inner_w_init = orthogonal

    retain_prob = 1.0 - dropout
    z_w = nn.Variable((batch_size, 4, input_size), need_grad=False)
    z_w.d = 1
    z_u = nn.Variable((batch_size, 4, state_size), need_grad=False)
    z_u.d = 1

    if dropout > 0:
        if train:
            z_w = F.dropout(z_w, p=retain_prob)
            z_u = F.dropout(z_u, p=retain_prob)
        z_w *= retain_prob
        z_u *= retain_prob

    z_w = F.reshape(z_w, (batch_size, 4, 1, input_size))
    z_w = F.broadcast(z_w, (batch_size, 4, length, input_size))
    z_w = F.split(z_w, axis=1)
    z_u = F.split(z_u, axis=1)
    xi = z_w[0] * x
    xf = z_w[1] * x
    xc = z_w[2] * x
    xo = z_w[3] * x

    with nn.parameter_scope("cond_att_lstm"):
        # (batch_size, length, state_size)
        with nn.parameter_scope("lstm"):
            xi = PF.affine(
                xi,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wi")
            xf = PF.affine(
                xf,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=forget_bias_init,
                name="Wf")
            xc = PF.affine(
                xc,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wc")
            xo = PF.affine(
                xo,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wo")

        with nn.parameter_scope("context"):
            # context_att_trans: (batch_size, context_size, att_hidden_size)
            context_att_trans = PF.affine(
                context,
                att_hidden_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="layer1_c")

    if initial_state is None:
        h = nn.Variable((batch_size, state_size), need_grad=False)
        h.data.zero()
    else:
        h = initial_state

    if initial_cell is None:
        c = nn.Variable((batch_size, state_size), need_grad=False)
        c.data.zero()
    else:
        c = initial_cell

    if hist is None:
        hist = nn.Variable((batch_size, 1, state_size), need_grad=False)
        hist.data.zero()

    # (batch_size, state_size)
    xi = split(xi, axis=1)
    xf = split(xf, axis=1)
    xc = split(xc, axis=1)
    xo = split(xo, axis=1)
    mask = F.reshape(mask, [batch_size, length, 1])  # (batch_size, length, 1)
    mask = F.broadcast(mask, [batch_size, length, state_size])
    # (batch_size, state_size)
    mask = split(mask, axis=1)
    # (batch_size, max_action_length)
    parent_index = parent_index + 1  # index == 0 means that parent is root
    # (batch_size)
    parent_index = split(parent_index, axis=1)

    hs = []
    cs = []
    ctx = []

    for i, f, c2, o, m, p in zip(xi, xf, xc, xo, mask, parent_index):
        h_num = hist.shape[1]
        with nn.parameter_scope("context"):
            h_att_trans = PF.affine(
                h,
                att_hidden_size,
                with_bias=False,
                w_init=w_init,
                name="layer1_h")  # (batch_size, att_hidden_size)
            h_att_trans = F.reshape(h_att_trans,
                                    (batch_size, 1, att_hidden_size))
            h_att_trans = F.broadcast(
                h_att_trans, (batch_size, context_length, att_hidden_size))
            att_hidden = F.tanh(context_att_trans + h_att_trans)
            att_raw = PF.affine(
                att_hidden, 1, base_axis=2, w_init=w_init,
                b_init=b_init)  # (batch_size, context_length, 1)
            att_raw = F.reshape(att_raw, (batch_size, context_length))
            ctx_att = F.exp(att_raw - F.max(att_raw, axis=1, keepdims=True))
            ctx_att = ctx_att * context_mask
            ctx_att = ctx_att / F.sum(ctx_att, axis=1, keepdims=True)
            ctx_att = F.reshape(ctx_att, (batch_size, context_length, 1))
            ctx_att = F.broadcast(ctx_att,
                                  (batch_size, context_length, context_size))
            ctx_vec = F.sum(
                context * ctx_att, axis=1)  # (batch_size, context_size)

        # parent_history
        p = F.reshape(p, (batch_size, 1))
        p = F.one_hot(p, (h_num, ))
        p = F.reshape(p, (batch_size, 1, h_num))
        par_h = F.batch_matmul(p, hist)  # [batch_size, 1, state_size]
        par_h = F.reshape(par_h, (batch_size, state_size))

        with nn.parameter_scope("lstm"):
            i_t = PF.affine(
                z_u[0] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Ui")
            i_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Ci")
            i_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pi")
            i_t = F.sigmoid(i + i_t)
            f_t = PF.affine(
                z_u[1] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uf")
            f_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Cf")
            f_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pf")
            f_t = F.sigmoid(f + f_t)
            c_t = PF.affine(
                z_u[2] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uc")
            c_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Cc")
            c_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pc")
            c_t = f_t * c + i_t * F.tanh(c2 + c_t)
            o_t = PF.affine(
                z_u[3] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uo")
            o_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Co")
            o_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Po")
            o_t = F.sigmoid(o + o_t)
            h_t = o_t * F.tanh(c_t)

            h_t = (1 - m) * h + m * h_t
            c_t = (1 - m) * c + m * c_t
            h = h_t
            c = c_t
            h_t = F.reshape(h_t, (batch_size, 1, state_size), inplace=False)
            c_t = F.reshape(c_t, (batch_size, 1, state_size), inplace=False)
            ctx_vec = F.reshape(
                ctx_vec, (batch_size, 1, context_size), inplace=False)
            hs.append(h_t)
            cs.append(c_t)
            ctx.append(ctx_vec)

            hist = F.concatenate(
                hist, h_t, axis=1)  # (batch_size, h_num + 1, state_size)

    return concatenate(
        *hs, axis=1), concatenate(
            *cs, axis=1), concatenate(
                *ctx, axis=1), hist
コード例 #30
0
ファイル: spectral_norm.py プロジェクト: Pandinosaurus/nnabla
def _spectral_norm_backward(dw_sn, w, u, dim=0, itr=1, eps=1e-12):
    # Forward recomputation

    w_shape = w.shape
    # Transpose if the output dimension is not the most-left dimension.
    if dim != 0:
        dims_transpose = [dim] + [i for i in range(len(w_shape)) if i != dim]
        w = F.transpose(w, dims_transpose)
        w_shape = w.shape
    d0 = w.shape[0]            # Out
    d1 = np.prod(w.shape[1:])  # In
    w = F.reshape(w, [d0, d1])
    u = F.reshape(u, [1, d0])
    # Power method
    for _ in range(itr):
        # v
        v = F.affine(u, w)
        v = v / ((F.sum(v ** 2.0, keepdims=True) + eps) ** 0.5)
        v = F.reshape(v, [d1, 1])
        # u
        u = F.affine(w, v)
        u = u / ((F.sum(u ** 2.0, keepdims=True) + eps) ** 0.5)
        u = F.reshape(u, [1, d0])
    # No grad
    u = no_grad(u)
    v = no_grad(v)
    # Spectral normalization
    wv = F.affine(w, v)
    sigma = F.affine(u, wv)
    w_sn = w / sigma
    # The fowllowing process is not necessary for gradient calculation
    # w_sn = F.reshape(w_sn, w_shape)
    # # Transpose again if the output dimension is not the most-left dimension.
    # if dim != 0:
    #     dims_transpose = [i for i in range(1, dim + 1)] \
    #                      + [0] + [i for i in range(dim + 1, len(w_shape))]
    #     w_sn = F.transpose(w_sn, dims_transpose)

    # Backward

    # Backward for post-transpose
    if dim != 0:
        dims_transpose = [dim] + [i for i in range(len(w_shape)) if i != dim]
        dw_sn = F.transpose(dw_sn, dims_transpose)
    dw_sn = dw_sn.reshape(w.shape)

    # Backward for spectral norm
    # Sum for broadcast backward
    S = sum_for_arithmetics(dw_sn * w_sn, sigma)
    # Add batch axis
    S = S.reshape((1,) + S.shape)
    u = u.reshape((1,) + u.shape)
    v = v.reshape((1,) + v.shape)
    m = F.batch_matmul(u, S, transpose_a=True)
    m = F.batch_matmul(m, v, transpose_b=True)
    # Remove batch axis
    m = m.reshape((m.shape[1], m.shape[2]))
    dw = (dw_sn - m) / sigma

    # Backward for pre-transpose
    dw = dw.reshape(w_shape)
    if dim != 0:
        dims_transpose = [i for i in range(1, dim + 1)] \
                         + [0] + [i for i in range(dim + 1, len(w_shape))]
        dw = F.transpose(dw, dims_transpose)

    return dw, None