Ejemplo n.º 1
0
def y_prob(layer, emit, d, batch):
    """
    :param emit: 1D: n_words, 2D: Batch, 3D: n_y
    :return: gradient of cross entropy: 1D: Batch
    """
    def forward_step(e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans):
        """
        :param e_t: 1D: Batch, 2D: n_y
        :param d_t: 1D: Batch
        :param d_prev: 1D: Batch
        :param d_score_prev: 1D: Batch
        :param z_scores_prev: 1D: Batch, 2D: n_y
        :param trans: 1D: n_y, 2D, n_y
        """
        d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch),
                                                            d_t]  # 1D: Batch
        z_sum = z_scores_prev.dimshuffle(
            0, 'x', 1) + trans  # 1D: Batch, 2D: n_y, 3D: n_y
        z_scores_t = logsumexp(z_sum, axis=2).reshape(
            e_t.shape) + e_t  # 1D: Batch, 2D: n_y
        return d_t, d_score_t, z_scores_t

    d_score0 = layer.BOS[d[0]] + emit[0][T.arange(batch), d[0]]  # 1D: Batch
    z_scores0 = layer.BOS + emit[0]  # 1D: Batch, 2D: n_y

    [_, d_scores,
     z_scores], _ = theano.scan(fn=forward_step,
                                sequences=[emit[1:], d[1:]],
                                outputs_info=[d[0], d_score0, z_scores0],
                                non_sequences=layer.W_t)

    d_score = d_scores[-1]
    z_score = logsumexp(z_scores[-1], axis=1).flatten()

    return d_score - z_score
Ejemplo n.º 2
0
    def get_log_probabilities(self, h, y):
        """Calculate log probabilities of y(predicated/gold tags)

        :param
            h: outputs from previous layer
                1D: sent_len
                2D: batch_size
                3D: output_dim
            y: predicated tags
                1D: sent_len
                2D: batch_size
            batch_size: batch size
        :return: log probabilities of y
        """
        batch_size = T.cast(y.shape[1], dtype="int32")
        # log likelihood of 1st tags
        # 1D: batch_size
        y_score_0 = (self.BOS_probability[y[0]] +
                     h[0][T.arange(batch_size), y[0]])
        # sum of all log likelihood of 1st all tags
        # 1D: batch_size, 2D: output_dim
        z_score_0 = self.BOS_probability + h[0]
        [_, y_score, z_score], _ = theano.scan(
            fn=self.forward_probability,
            sequences=[h[1:], y[1:]],
            outputs_info=[y[0], y_score_0, z_score_0],
        )

        return y_score[-1] - logsumexp(z_score[-1], axis=1).flatten()
Ejemplo n.º 3
0
def get_path_score_z(h, W_trans):
    """
    :param h: 1D: n_words, 2D: batch, 3D: n_labels; label score
    :param W_trans: : 1D: n_labels, 2D: n_labels; label score; transition score between two labels
    :return: 1D: batch
    """
    alpha_init = h[0]
    alpha, _ = theano.scan(fn=forward_alpha,
                           sequences=[h[1:]],
                           outputs_info=alpha_init,
                           non_sequences=W_trans)
    return logsumexp(alpha[-1], axis=1).ravel()
Ejemplo n.º 4
0
def get_state_score_z(h, y, W_trans):
    """
    :param h: 1D: n_words, 2D: batch, 3D: n_labels (j); label score
    :param y: 1D: n_words, 2D: batch; label id
    :param W_trans: 1D: n_labels (i), 2D: n_labels (j); transition score from i to j
    :return: 1D: n_words, 2D: batch; specified label score
    """
    # 1D: n_words-1, 2D: batch, 3D: n_labels (j); label score
    trans_scores = get_transition_scores(y, W_trans)
    # 1D: 1, 2D: batch, 3D: n_labels (j); 0
    zero = T.zeros(shape=(1, h.shape[1], h.shape[2]),
                   dtype=theano.config.floatX)
    # 1D: n_words, 2D: batch, 3D: n_labels (j); label score
    trans_scores = T.concatenate([zero, trans_scores], axis=0)
    return logsumexp(h + trans_scores, axis=2)
Ejemplo n.º 5
0
 def forward_step(e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans):
     """
     :param e_t: 1D: Batch, 2D: n_y
     :param d_t: 1D: Batch
     :param d_prev: 1D: Batch
     :param d_score_prev: 1D: Batch
     :param z_scores_prev: 1D: Batch, 2D: n_y
     :param trans: 1D: n_y, 2D, n_y
     """
     d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch),
                                                         d_t]  # 1D: Batch
     z_sum = z_scores_prev.dimshuffle(
         0, 'x', 1) + trans  # 1D: Batch, 2D: n_y, 3D: n_y
     z_scores_t = logsumexp(z_sum, axis=2).reshape(
         e_t.shape) + e_t  # 1D: Batch, 2D: n_y
     return d_t, d_score_t, z_scores_t
Ejemplo n.º 6
0
    def forward_probability(self, h_t, y_t, y_tm1, y_score_tm1, z_score_tm1):
        """Calculate CRF unit

        :param
            h_t: emission
                1D: batch_size
                2D: output_dim
            y_t: tag
                1D: batch_size
            y_tm1: previous tag
            y_score_tm1: log likelihood of previous tag
                1D: batch_size
            z_score_tm1: sum of all log likelihood of all previous tags
                1D: batch_size
                2D: output_dim
        :return
            y_t: tag
                1D: batch_size
            y_score_t: log likelihood of tag
                1D: batch_size
            z_score_t: sum of all log likelihood of all tags
                1D: batch_size
                2D: output_dim
        """
        batch_size = T.cast(h_t.shape[0], dtype="int32")
        y_score_t = (
            y_score_tm1  # forward
            + self.W_transition[y_t, y_tm1]  # transition
            + h_t[T.arange(batch_size), y_t])  # emission
        z_score_t = (
            logsumexp(
                z_score_tm1.dimshuffle(0, 'x', 1)  # forward
                + self.W_transition,  # transition
                axis=2).reshape(h_t.shape) + h_t)  # emission

        return y_t, y_score_t, z_score_t
Ejemplo n.º 7
0
 def hiddens_to_output_probs(hiddens):
     output = concat_and_multiply(params['predict'], hiddens)
     return output - logsumexp(output, axis=1, keepdims=True)     # Normalize log-probs.