Exemple #1
0
def dot_attention(query, key, value, mask, dropout=0.0):
    # query: (batch_size, h, length_q, model_dim/h)
    # key:   (batch_size, h, length_k, model_dim/h)
    # value: (batch_size, h, length_k, model_dim/h)

    query_shape = query.shape
    query = query.reshape(-3, -2)
    key = key.reshape(-3, -2)
    value = value.reshape(-3, -2)

    # matmul, t: (batch_size*h, length_q, length_k)
    t = nd.batch_dot(query, key.swapaxes(1, 2)) / math.sqrt(query.shape[-1])

    # masked
    # mask PAD and future words
    m = nd.full(t.shape, LARGE_NEGATIVE_VALUE)
    mask = nd.ones(t.shape) * mask
    t = nd.where(mask, t, m)

    # softmax
    t = nd.softmax(t, axis=-1)
    if dropout > 0.0:
        t = nd.dropout(t, p=dropout)

    # (batch_size, h, length_q, model_dim/h)
    return nd.batch_dot(t, value).reshape(query_shape)
Exemple #2
0
    def _train_batch(self, batch, gibbs_sampling_steps, learning_rate):
        """Performs k-step Contrastive Divergence (CD-k) learning.
        Updates weights and biases.
        Keep in mind that most variables are "batch" tensors.
        Variable name suffix "_pr" stands for Pr. (probability).
        """
        hidden_pr, hidden, dreamed_visible, dreamed_hidden_pr = self.gibbs_sampling_step(
            batch)

        positive_phase = nd.batch_dot(self._transpose_batch(batch), hidden)
        for _ in range(gibbs_sampling_steps - 1):
            _, _, dreamed_visible, dreamed_hidden_pr = self.gibbs_sampling_step(
                dreamed_visible)
        negative_phase = nd.batch_dot(self._transpose_batch(dreamed_visible),
                                      dreamed_hidden_pr)

        #  make learning rate independent from the batch size
        learning_rate = learning_rate / batch.shape[0]

        self.weights += learning_rate * nd.sum(positive_phase - negative_phase,
                                               axis=(0, ))

        if self.hidden_bias is not None:
            self.hidden_bias += learning_rate * nd.sum(
                hidden_pr - dreamed_hidden_pr, axis=(0, ))
        if self.visible_bias is not None:
            self.visible_bias += learning_rate * nd.sum(
                batch - dreamed_visible, axis=(0, ))
Exemple #3
0
 def forward(self, query, key, value, mask=None):
     d = query.shape[-1]
     scores = nd.batch_dot(query, key, transpose_b=True) / math.sqrt(d)
     attention_weights = nlp.model.attention_cell._masked_softmax(
         nd, scores, mask, scores.dtype)
     attention_weights = self.dropout(attention_weights)
     return nd.batch_dot(attention_weights, value)
Exemple #4
0
    def forward(self, input_data):
        freq = input_data[:, 0:2].expand_dims(1)
        input_data = input_data[:, 2:]
        e1_vec_start = FIXED_WORD_LENGTH * DIMENSION
        x = input_data[:, :e1_vec_start].reshape(
            (input_data.shape[0], FIXED_WORD_LENGTH,
             DIMENSION))  # (m, 60, 110)

        e1neimask = input_data[:, e1_vec_start:e1_vec_start +
                               MASK_LENGTH]  # (m, 51)
        e1edge = input_data[:, e1_vec_start + MASK_LENGTH:e1_vec_start +
                            MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH].reshape(
                                (input_data.shape[0], ENTITY_DEGREE,
                                 WORD_DIMENSION * 2))  # (m, 51, 200)
        e1neigh = e1edge[:, :, :WORD_DIMENSION]

        e2_vec_start = e1_vec_start + MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH
        e2neimask = input_data[:, e2_vec_start:e2_vec_start +
                               MASK_LENGTH]  # (m, 51)
        e2edge = input_data[:, e2_vec_start + MASK_LENGTH:e2_vec_start +
                            MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH].reshape(
                                (input_data.shape[0], ENTITY_DEGREE,
                                 WORD_DIMENSION * 2))  # (m, 51,200)
        e2neigh = e2edge[:, :, :WORD_DIMENSION]

        gru = self.gru
        x = nd.transpose(x, axes=(1, 0, 2))
        h = gru(x)
        ht = nd.transpose(h, axes=(1, 0, 2))
        gru_out = self.gru_out
        y1 = gru_out(ht.expand_dims(1))  # (m,200)

        att = self.center_att
        e1edge = nd.tanh(e1edge)
        e1g = att(e1edge) * freq[:, :, :1]  # (m,51,1)
        e1g = e1g * e1neimask.expand_dims(2)
        e1g = nd.softmax(e1g, axis=1)
        e1gt = nd.transpose(e1g, axes=(0, 2, 1))  # (m,1,151)
        e1n = nd.batch_dot(e1gt, e1neigh)  # (m,1,100)
        e1n = e1n.reshape((e1n.shape[0], 100))  # (m,100)

        e2edge = nd.tanh(e2edge)
        e2g = att(e2edge) * freq[:, :, 1:]  # (m,51,1)
        e2g = e2g * e2neimask.expand_dims(2)
        e2g = nd.softmax(e2g, axis=1)
        e2gt = nd.transpose(e2g, axes=(0, 2, 1))  # (m,1,151)
        e2n = nd.batch_dot(e2gt, e2neigh)  # (m,1,100)
        e2n = e2n.reshape((e2n.shape[0], 100))  # (m,100)

        center_y = nd.concat(e1n, e2n, dim=1)  # (m,200)
        center_out = self.center_out
        center_y = center_out(center_y)

        out = self.output
        y4 = nd.concat(y1, center_y, dim=1)
        y5 = out(y4)
        return y5
Exemple #5
0
    def forward(self, x):
        """Forward Relation Module.

        Parameters
        ----------
        feat : mxnet.nd.NDArray or mxnet.symbol
            (M, 1024) Feature tensor (used to compute q).
        ctx_feat : mxnet.nd.NDArray or mxnet.symbol
            (N, 1024)Contextual Feature tensor (used to compute k,v).
        box: mxnet.nd.NDArray or mxnet.symbol
            (M, 4) boxes with corner encoding.
        ctx_box: mxnet.nd.NDArray or mxnet.symbol
            (N, 4) boxes with corner encoding.

        Returns
        -------
        gt_relation_feat, ctx_relation_feat
            (M, 1024).
        """
        e = self.dim_k  # e = 1024    (feature size)
        k, v, q = x.shape[0], x.shape[0], x.shape[
            0]  # k, v, q = N (number of bounding boxes)
        h = self.num_group  # h = 16      (Number of groups or num_group for multi head attention)

        x = x.reshape(k, h, e)
        x = x.reshape(k * h, e)
        keys = self.to_keys(x).reshape(k, h, e).transpose(
            axes=(1, 0, 2))  # keys    : (h, k, e)
        values = self.to_values(x).reshape(k, h, e).transpose(
            axes=(1, 0, 2))  # values  : (h, v, e)
        queries = self.to_queries(x).reshape(k, h, e).transpose(
            axes=(1, 0, 2))  # queries : (h, q, e)

        keys = keys / (self.num_feat**(1 / 4))
        queries = queries / (self.num_feat**(1 / 4))
        dot = F.batch_dot(lhs=queries,
                          rhs=keys,
                          transpose_a=False,
                          transpose_b=True)  # dot : (h, q, k)

        attention = F.softmax(dot, axis=2)

        out = F.batch_dot(lhs=attention,
                          rhs=values,
                          transpose_a=False,
                          transpose_b=False)  # out : (h, q, e)
        out = out.transpose(axes=(1, 0, 2))  # out : (q, h, e)
        out = out.reshape(q, -1)  # out : (q, h*e)

        out = self.unify_heads(out)  # out : (q, e)
        return out
Exemple #6
0
 def _get_co_attention(as_, bs_, r, lamb=k_lambda):
     """
     as_, bs_: (batch_size, seq_len, embed_size)
     r: (batch_size, seq_len, seq_len, 5)
     """
     e = nd.batch_dot(as_, bs_, transpose_b=True) + lamb * F(
         r, ctx)  # (batch_size, seq_len, seq_len,)
     alpha = nd.softmax(e, axis=2)  # alpha_ij = exp(eij) / SUM_k(exp(eik))
     beta = nd.softmax(e, axis=1)  # beta_ij = exp(ij) / SUM_k(exp(ekj))
     beta = nd.transpose(beta,
                         axes=[0, 2,
                               1])  # transpose becasue of softmax axis=1
     ac = nd.batch_dot(alpha, bs_)  #
     bc = nd.batch_dot(beta, as_)
     return ac, bc, alpha, beta
Exemple #7
0
 def forward(self, query, key, value, valid_length):
     """Forward function"""
     query, key = self.W_k(query), self.W_q(key)
     features = query.expand_dims(axis=2) + key.expand_dims(axis=1)
     scores = self.v(features).squeeze(axis=-1)
     attention_weights = self.dropout(masked_softmax(scores, valid_length))
     return nd.batch_dot(attention_weights, value)
Exemple #8
0
    def forward(self, x):
        '''
        Parameters
        ----------
        x: mx.ndarray, shape is (batch_size, N, C_{r-1}, T_{r-1})

        Returns
        ----------
        mx.ndarray, shape is (batch_size, N, num_of_time_filters, T_{r-1})

        '''
        (batch_size, num_of_vertices,
         num_of_features, num_of_timesteps) = x.shape
        # shape is (batch_size, T, T)
        temporal_At = self.TAt(x)

        x_TAt = nd.batch_dot(x.reshape(batch_size, -1, num_of_timesteps),
                             temporal_At)\
                  .reshape(batch_size, num_of_vertices,
                           num_of_features, num_of_timesteps)

        # cheb gcn with spatial attention
        spatial_At = self.SAt(x_TAt)
        spatial_gcn = self.cheb_conv_SAt(x, spatial_At)

        # convolution along time axis
        time_conv_output = (self.time_conv(spatial_gcn.transpose((0, 2, 1, 3)))
                            .transpose((0, 2, 1, 3)))

        # residual shortcut
        x_residual = (self.residual_conv(x.transpose((0, 2, 1, 3)))
                      .transpose((0, 2, 1, 3)))

        return self.ln(nd.relu(x_residual + time_conv_output))
Exemple #9
0
 def forward(self, x1, x2):
     y1 = self.mlp(x1)
     y2 = self.mlp(x2)
     # re-shape it
     y1 = y1.expand_dims(axis=1)  # add dummy dimension
     y2 = y2.expand_dims(axis=2)  # Y1: (N, 1, C) Y2: (N, C, 1)
     return nd.batch_dot(y1, y2)
Exemple #10
0
 def forward(self, query, key, value, valid_length):
     """Forward function"""
     query, key = self.W_k(query), self.W_q(key)
     features = query.expand_dims(axis=2) + key.expand_dims(axis=1)
     scores = self.v(features).squeeze(axis=-1)
     attention_weights = self.dropout(masked_softmax(scores, valid_length))
     return nd.batch_dot(attention_weights, value)
Exemple #11
0
    def _calculate_trilinear_similarity(self, context, query, context_max_len,
                                        query_max_len, w4mlu, bias):
        """Implement the computation of trilinear similarity function.

            refer https://github.com/NLPLearn/QANet/blob/master/layers.py#L505

            The similarity function is:
                    f(w, q) = W[w, q, w * q]
            where w and q represent the word in context and query respectively,
            and * operator means hadamard product.

        Parameters
        -----------
        context : NDArray
            input tensor with shape `(batch_size, context_sequence_length, hidden_size)`
        query : NDArray
            input tensor with shape `(batch_size, query_sequence_length, hidden_size)`
        context_max_len : int
        context_max_len : int

        Returns
        --------
        similarity_mat : NDArray
            output tensor with shape `(batch_size, context_sequence_length, query_sequence_length)`
        """

        subres0 = nd.tile(self.w4c(context), [1, 1, query_max_len])
        subres1 = nd.tile(nd.transpose(self.w4q(query), axes=(0, 2, 1)),
                          [1, context_max_len, 1])
        subres2 = nd.batch_dot(w4mlu * context,
                               nd.transpose(query, axes=(0, 2, 1)))
        similarity_mat = subres0 + subres1 + subres2 + bias
        return similarity_mat
Exemple #12
0
def attention(query, key, value, mask=None, dropout=None):
    # Q * K.transpose() * value
    assert (len(query.shape) == 3)
    assert (len(key.shape) == 3)
    assert (len(value.shape) == 3)
    d_model = query.shape[-1]

    scores = nd.batch_dot(query, key, transpose_b=True) / math.sqrt(d_model)

    if mask is not None:
        val = nd.ones(scores.shape, ctx=cfg.ctx) * (-1e9)
        scores = nd.where(mask == 1, scores, val)
    p_attn = nd.softmax(scores, axis=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return nd.batch_dot(p_attn, value), p_attn
    def forward(self, cur_input, state, encoder_outputs):
        # 当循环神经网络有多个隐藏层时,取靠近输出层的单层隐藏状态
        single_layer_state = [state[0][-1].expand_dims(0)]
        
        
        encoder_outputs = encoder_outputs.reshape((self.max_seq_len, -1,
                                                   self.encoder_num_hiddens))

        hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0,
                                             size=self.max_seq_len)
        encoder_outputs_and_hiddens = nd.concat(encoder_outputs,
                                                hidden_broadcast, dim=2)

        energy = self.attention(encoder_outputs_and_hiddens)

        batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0))
        batch_encoder_outputs = encoder_outputs.swapaxes(0, 1)
        decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs)
        #改这里
        input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input), axis=1),
            decoder_context, dim=2)
        concat_input = self.rnn_concat_input(input_and_context).reshape((1, -1, 0))

        concat_input = self.dropout(concat_input)

        state = [nd.broadcast_axis(single_layer_state[0], axis=0,size=self.num_layers)]

        output, state = self.rnn(concat_input, state)

        output = self.dropout(output)
        output = self.out(output).reshape((-3, -1))
        return output, state
def calculate_loss(x, y, model, loss, loss_name, class_weight, penalization_coeff):
    """calculate loss value

    Args:
        x (NDArray): intput of model
        y (NDArray): target
        model (Block): model
        loss (gluon.loss): loss function
        loss_name (str): name of loss function
        class_weight (NDArray): weight of sample loss value for each category
        penalization_coeff (float): Attention penalty coefficient

    Returns:
        NDArray: output of model
        NDArray: loss value
    """

    pred, att = model(x)
    if loss_name == 'sce':
        l = loss(pred, y)
    elif loss_name == 'wsce':
        l = loss(pred, y, class_weight, class_weight.shape[0])

    # penalty
    diversity_penalty = nd.batch_dot(att, nd.transpose(att, axes=(0, 2, 1))
                                     ) - nd.eye(att.shape[1], ctx=att.context)
    l = l + penalization_coeff * diversity_penalty.norm(axis=(1, 2))

    return pred, l
Exemple #15
0
    def forward(self, x):

        if self.routing is not None:
            routing_weight = nd.softmax(nd.zeros(shape=(1, 1, self.num_points),
                                                 ctx=x.context),
                                        axis=2)
        trans = self.stn(x)
        x = nd.transpose(x, (0, 2, 1))
        x = nd.batch_dot(x, trans)
        x = nd.transpose(x, (0, 2, 1))
        x = nd.relu(self.bn1(self.conv1(x)))
        pointfeat = x
        x = nd.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        if self.routing is not None:
            s = nd.sum(x * routing_weight, axis=2, keepdims=True)
            # v = Squash(s, axis=1)
            for _ in range(self.routing):
                routing_weight = routing_weight + nd.sum(
                    x * s, axis=1, keepdims=True)
                c = nd.softmax(routing_weight, axis=2)
                s = nd.sum(x * c, axis=2, keepdims=True)
                # v = Squash(s, axis=1)
            x = s
        else:
            x = self.mp1(x)
        if self.global_feat:
            return x, trans
        else:
            x = x.repeat(self.num_points, axis=2)
            return nd.concat(x, pointfeat, dim=1), trans
Exemple #16
0
 def matmul(self, x, y, transpose_a=False,transpose_b=False):
     x = nd.split(x, self.embedding_size, 2)
     y = nd.split(y, self.embedding_size, 2)
     res = []
     for idx in range(self.embedding_size):
         array = nd.batch_dot(x[idx], y[idx], transpose_a,transpose_b=transpose_b)
         res.append(array.asnumpy().tolist())
     return nd.array(res,ctx=self.ctx)
Exemple #17
0
def augment(points, xforms, r=None):
    points_xformed = nd.batch_dot(points, xforms, name='points_xformed')
    if r is None:
        return points_xformed

    jitter_data = r * mx.random.normal(shape=points_xformed.shape)
    jitter_clipped = nd.clip(jitter_data, -5 * r, 5 * r, name='jitter_clipped')
    return points_xformed + jitter_clipped
Exemple #18
0
    def forward(self, decoder_output, encoder_output):
        """TODO: Docstring for forward.

        :decoder_output: TODO
        :encoder_output: TODO
        :returns: TODO

        """

        decoder_output = decoder_output.transpose([0, 2, 1])

        score = nd.batch_dot(encoder_output, decoder_output)

        weight = nd.softmax(score, axis=1)

        context = nd.batch_dot(nd.transpose(weight, [0, 2, 1]), encoder_output)

        return context, nd.squeeze(weight)
Exemple #19
0
def bilinear(x,
             W,
             y,
             input_size,
             seq_len,
             batch_size,
             num_outputs=1,
             bias_x=False,
             bias_y=False):
    """Do xWy

    Parameters
    ----------
    x : NDArray
        (input_size x seq_len) x batch_size
    W : NDArray
        (num_outputs x ny) x nx
    y : NDArray
        (input_size x seq_len) x batch_size
    input_size : int
        input dimension
    seq_len : int
        sequence length
    batch_size : int
        batch size
    num_outputs : int
        number of outputs
    bias_x : bool
        whether concat bias vector to input x
    bias_y : bool
        whether concat bias vector to input y

    Returns
    -------
    output : NDArray
        [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x]
        x batch_size
    """
    if bias_x:
        x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0)
    if bias_y:
        y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0)

    ny = input_size + bias_y
    # W: (num_outputs x ny) x nx
    lin = nd.dot(W, x)
    if num_outputs > 1:
        lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size))
    y = y.transpose([2, 1, 0])  # May cause performance issues
    lin = lin.transpose([2, 1, 0])
    blin = nd.batch_dot(lin, y, transpose_b=True)
    blin = blin.transpose([2, 1, 0])
    if num_outputs > 1:
        blin = reshape_fortran(blin,
                               (seq_len, num_outputs, seq_len, batch_size))
    return blin
Exemple #20
0
    def forward(self, x, spatial_attention, cheb_polynomials):
        '''
        Chebyshev graph convolution operation

        Parameters
        ----------
        x: mx.ndarray, graph signal matrix
           shape is (batch_size, N, F, T_{r-1}), F is the num of features

        spatial_attention: mx.ndarray, shape is (batch_size, N, N)
                           spatial attention scores

        Returns
        ----------
        mx.ndarray, shape is (batch_size, N, self.num_of_filters, T_{r-1})

        '''
        (batch_size, num_of_vertices,
         num_of_features, num_of_timesteps) = x.shape

        self.Theta.shape = (self.K, num_of_features, self.num_of_filters)
        self.Theta._finish_deferred_init()

        cur_context = x.context

        outputs = []
        for time_step in range(num_of_timesteps):
            # shape is (batch_size, V, F)
            graph_signal = x[:, :, :, time_step]
            output = nd.zeros(shape=(batch_size, num_of_vertices,
                                     self.num_of_filters), ctx=x.context)
            for k in range(self.K):

                # shape of T_k is (V, V)
                T_k = cheb_polynomials[k].tostype('default').as_in_context(cur_context)
                # print("T_K: ", T_k)

                # shape of T_k_with_at is (batch_size, V, V)
                T_k_with_at = T_k * spatial_attention
                # T_k_with_at = T_k.as_in_context(cur_context) * spatial_attention

                # print("T_k_with_at: ", T_k_with_at)
                # shape of theta_k is (F, num_of_filters)
                theta_k = self.Theta.data(cur_context)[k]

                # shape is (batch_size, V, F)
                # rhs = nd.batch_dot(T_k_with_at.transpose((0, 2, 1)).tostype('csr'),
                #                    graph_signal)
                # print("T_k_with_at: ", T_k_with_at)
                # print("graph signal: ", graph_signal)
                rhs = nd.batch_dot(T_k_with_at, graph_signal)
                # print("rhs: ", rhs)
                # print("theta_k: ", theta_k)
                output = output + nd.dot(rhs, theta_k)
            outputs.append(output.expand_dims(-1))
        return nd.relu(nd.concat(*outputs, dim=-1))
Exemple #21
0
    def forward(self, x):
        '''
        Parameters
        ----------
        x: mx.ndarray, x^{(r - 1)}_h
                       shape is (batch_size, N, C_{r-1}, T_{r-1})

        Returns
        ----------
        E_normalized: mx.ndarray, S', spatial attention scores
                      shape is (batch_size, T_{r-1}, T_{r-1})

        '''
        _, num_of_vertices, num_of_features, num_of_timesteps = x.shape

        # defer shape
        self.U_1.shape = (num_of_vertices, )
        self.U_2.shape = (num_of_features, num_of_vertices)
        self.U_3.shape = (num_of_features, )
        self.b_e.shape = (1, num_of_timesteps, num_of_timesteps)
        self.V_e.shape = (num_of_timesteps, num_of_timesteps)
        for param in [self.U_1, self.U_2, self.U_3, self.b_e, self.V_e]:
            param._finish_deferred_init()

        # print(x)
        # print(self.U_1.data())
        # print(self.U_2.data())
        # print("=========================")
        # print("Context of Variables")
        # print("x::{0}, D1::{1}, D2::{2}".format(x.context, self.U_1.data().context, self.U_2.data().context))
        # print("=========================")

        # print(x)
        # compute temporal attention scores
        # shape is (N, T, V)
        # context.current_context()
        # print("temporal context", context.current_context())

        cur_context = x.context
        lhs = nd.dot(nd.dot(x.transpose((0, 3, 2, 1)), self.U_1.data(cur_context)),
                     self.U_2.data(cur_context))

        # shape is (N, V, T)
        rhs = nd.dot(self.U_3.data(cur_context), x.transpose((2, 0, 1, 3)))

        product = nd.batch_dot(lhs, rhs)

        E = nd.dot(self.V_e.data(cur_context),
                   nd.sigmoid(product + self.b_e.data(cur_context))
                     .transpose((1, 2, 0))).transpose((2, 0, 1))

        # normailzation
        E = E - nd.max(E, axis=1, keepdims=True)
        exp = nd.exp(E)
        E_normalized = exp / nd.sum(exp, axis=1, keepdims=True)
        return E_normalized
Exemple #22
0
    def forward(self, cur_input, state, encoder_outputs):
        # 当RNN为多层时,取最靠近输出层的单层隐含状态。
        # state.shape is [(1, batch_size, decoder_hidden_dim)]
        single_layer_state = [state[0][-1].expand_dims(0)]
        # encoder_outputs.shape is (max_seq_len, batch_size * encoder_hidden_dim)
        encoder_outputs = encoder_outputs.reshape(
            (self.max_seq_len, -1, self.encoder_hidden_dim))
        # single_layer_state尺寸: [(1, batch_size, decoder_hidden_dim)]
        # hidden_broadcast尺寸: (max_seq_len, batch_size, decoder_hidden_dim)
        hidden_broadcast = nd.broadcast_axis(single_layer_state[0],
                                             axis=0,
                                             size=self.max_seq_len)

        # encoder_outputs_and_hiddens尺寸:
        # (max_seq_len, batch_size, encoder_hidden_dim + decoder_hidden_dim)
        encoder_outputs_and_hiddens = nd.concat(encoder_outputs,
                                                hidden_broadcast,
                                                dim=2)

        # energy尺寸: (max_seq_len, batch_size, 1)
        energy = self.attention(encoder_outputs_and_hiddens)

        # batch_attention尺寸: (batch_size, 1, max_seq_len)
        batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0))

        # batch_encoder_outputs尺寸: (batch_size, max_seq_len, encoder_hidden_dim)
        batch_encoder_outputs = encoder_outputs.swapaxes(0, 1)

        # decoder_context尺寸: (batch_size, 1, encoder_hidden_dim)
        decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs)

        # cur_input尺寸: (batch_size,)
        # input_and_context尺寸: (batch_size, 1, decoder_hidden_dim + encoder_hidden_dim )
        input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input),
                                                     axis=1),
                                      decoder_context,
                                      dim=2)
        # concat_input尺寸: (1, batch_size, decoder_hidden_dim)
        concat_input = self.rnn_concat_input(input_and_context).reshape(
            (1, -1, 0))
        concat_input = self.dropout(concat_input)

        # 当RNN为多层时,用单层隐含状态初始化各个层的隐含状态。
        state = [
            nd.broadcast_axis(single_layer_state[0],
                              axis=0,
                              size=self.num_layers)
        ]

        # XXX 注意:state 是 [nd.NDArray]
        output, state = self.rnn(concat_input, state)
        output = self.dropout(output)
        output = self.out(output)
        output = nd.reshape(output, (-3, -1))
        # output尺寸: (batch_size * 1, output_dim)
        return output, state
Exemple #23
0
 def predict(self,x):
     h=self.e(x[:, 0])
     r=self.r(x[:, 1])
     t=self.e(x[:, 2])
     t=t.reshape(-1,self.dim,1)
     r=r.reshape(-1,self.dim,self.dim)
     tr=nd.batch_dot(r,t)
     tr=tr.reshape(-1,self.dim)
     score = nd.sum(h*tr,-1)
     return -score
 def forward(self, input_data):
     x = nd.transpose(input_data, axes=(1, 0, 2))
     h = nd.transpose(self.gru(x), axes=(1, 0, 2))  # (m,60,100)
     h = nd.tanh(h)
     g = self.att(h)  # (m,60,1)
     g = nd.softmax(g, axis=1)
     gt = nd.transpose(g, axes=(0, 2, 1))  # (m,1,60)
     n = nd.batch_dot(gt, h)
     y = self.att_out(n)
     return self.output(y)
Exemple #25
0
 def forward(self, x_left, x_right):
     x_left = self.embed_left(x_left)
     x_right = self.embed_right(x_right)
     embed_cross = nd.expand_dims(
         nd.batch_dot(x_left, x_right, transpose_b=True), 3)
     embed_cross = nd.transpose(embed_cross, (0, 3, 1, 2))
     embed_cross = self._conv_block(embed_cross)
     embed_pool = self.pool(embed_cross)
     out = self.output_layer(embed_pool)
     return out
Exemple #26
0
 def forward(self, emb_a, emb_b):
     # emb_a: batch_size*seq_len_a*emb_size, emb_b: batch_size*seq_len_b*emb_size
     # self.W: emb_size*emb_size
     # After the evaluation, the shape is batch_size*seq_len_a*emb_size_b
     dot_product = nd.batch_dot(nd.dot(emb_a, self.W.data()), \
                                nd.transpose(emb_b, axes=(0, 2, 1)))
     # this softmax is subject to servere numerical unstability,
     # add a work around
     G_ab = nd.softmax(dot_product -
                       nd.max(dot_product, axis=1, keepdims=True),
                       axis=1)
     return G_ab
Exemple #27
0
def compute_curvature(nn_pts):
    nn_pts_mean = nd.mean(nn_pts, axis=2, keepdims=True)  # (N, P, 1, 3)
    nn_pts_demean = nn_pts - nn_pts_mean  # (N, P, K, 3)
    nn_pts_NPK31 = nd.expand_dims(nn_pts_demean, axis=-1)
    covariance_matrix = nd.batch_dot(nn_pts_NPK31,
                                     nn_pts_NPK31,
                                     transpose_b=True)  # (N, P, K, 3, 3)
    covariance_matrix_mean = nd.mean(covariance_matrix, axis=2,
                                     keepdims=False)  # (N, P, 3, 3)
    eigvals = compute_eigenvals(covariance_matrix_mean)  # (N, P, 3)
    curvature = nd.min(eigvals, axis=-1) / (nd.sum(eigvals, axis=-1) + 1e-8)
    return curvature
Exemple #28
0
 def bdd_message_func(self, edges):
     """Message function for block-diagonal-decomposition regularizer"""
     ctx = edges.src['h'].context
     if edges.src['h'].dtype in (np.int32, np.int64) and len(edges.src['h'].shape) == 1:
         raise TypeError('Block decomposition does not allow integer ID feature.')
     weight = self.weight.data(ctx)[edges.data['type'], :].reshape(
         -1, self.submat_in, self.submat_out)
     node = edges.src['h'].reshape(-1, 1, self.submat_in)
     msg = nd.batch_dot(node, weight).reshape(-1, self.out_feat)
     if 'norm' in edges.data:
         msg = msg * edges.data['norm']
     return {'msg': msg}
def make_dynamic_dec(T, values_L):

    values_T = nd.array(np.linspace(1, T, num=T), ctx=values_L.context)

    values_T = nd.expand_dims(nd.expand_dims(values_T, axis=0), axis=2)

    values_T = nd.broadcast_axis(values_T, axis=0, size=values_L.shape[0])

    values_TL = nd.batch_dot(values_T, values_L, transpose_b=True)

    values_sin = nd.sin(values_TL)
    values_cos = nd.cos(values_TL)

    return nd.concat(values_sin, values_cos, dim=2)
Exemple #30
0
    def forward(self, feature, data):
        """ Forward process of a HyperDense layer

        Args:
            feature: a NDArray with shape [n, d]
            data: a NDArray with shape [n, b, pre_d]

        Returns:
            output: a NDArray with shape [n, b, d]
        """
        weight = self.w_mlp(feature) # [n, pre_hidden_size * hidden_size]
        weight = nd.reshape(weight, (-1, self.pre_hidden_size, self.hidden_size))
        bias = nd.reshape(self.b_mlp(feature), shape=(-1, 1, 1)) # [n, 1, 1]
        return nd.batch_dot(data, weight) + bias
Exemple #31
0
    def forward(self, x, time, context):
        hid = []
        hid.append(x)
        # m_i = sum A_ij * x_ij + T_A_i
        Ain_c = self.A(context)
        Ain_t = self.T_A(time)
        Ain = Ain_c + Ain_t

        # c_i = sum B_ij * u + T_B_i
        Bin_c = self.B(context)
        Bin_t = self.T_B(time)
        Bin = Bin_c + Bin_t
        
        for h in xrange(self.nhop):
            hid3dim = hid[-1].expand_dims(1)
            Aout = nd.batch_dot(hid3dim, Ain.swapaxes(1,2))
            Aout2dim = Aout.reshape((-1, self.mem_size))
            P = nd.softmax(Aout2dim, axis=1)
            
            Prob3dim = P.expand_dims(1)
            Bout = nd.batch_dot(Prob3dim, Bin)
            Bout2dim = Bout.reshape((-1, self.edim))
            
            Cout = self.C(hid[-1])
            Dout = Bout2dim + Cout
            
            if self.lindim == self.edim:
                hid.append(Dout)
            elif self.lindim == 0:
                hid.append(nd.relu(Dout))
            else:
                F = Dout[:, :self.lindim]
                G = Dout[:, self.lindim:]
                K = nd.relu(G)
                hid.append(nd.concat(F, K, dim=1))
        z = self.W(hid[-1])
        return z
Exemple #32
0
 def forward(self, query, key, value, valid_length=None):
     """Forward function"""
     d = query.shape[-1]
     scores = nd.batch_dot(query, key, transpose_b=True) / math.sqrt(d)
     attention_weights = self.dropout(masked_softmax(scores, valid_length))
     return nd.batch_dot(attention_weights, value)