def y_prob(layer, emit, d, batch): """ :param emit: 1D: n_words, 2D: Batch, 3D: n_y :return: gradient of cross entropy: 1D: Batch """ def forward_step(e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans): """ :param e_t: 1D: Batch, 2D: n_y :param d_t: 1D: Batch :param d_prev: 1D: Batch :param d_score_prev: 1D: Batch :param z_scores_prev: 1D: Batch, 2D: n_y :param trans: 1D: n_y, 2D, n_y """ d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch), d_t] # 1D: Batch z_sum = z_scores_prev.dimshuffle( 0, 'x', 1) + trans # 1D: Batch, 2D: n_y, 3D: n_y z_scores_t = logsumexp(z_sum, axis=2).reshape( e_t.shape) + e_t # 1D: Batch, 2D: n_y return d_t, d_score_t, z_scores_t d_score0 = layer.BOS[d[0]] + emit[0][T.arange(batch), d[0]] # 1D: Batch z_scores0 = layer.BOS + emit[0] # 1D: Batch, 2D: n_y [_, d_scores, z_scores], _ = theano.scan(fn=forward_step, sequences=[emit[1:], d[1:]], outputs_info=[d[0], d_score0, z_scores0], non_sequences=layer.W_t) d_score = d_scores[-1] z_score = logsumexp(z_scores[-1], axis=1).flatten() return d_score - z_score
def get_log_probabilities(self, h, y): """Calculate log probabilities of y(predicated/gold tags) :param h: outputs from previous layer 1D: sent_len 2D: batch_size 3D: output_dim y: predicated tags 1D: sent_len 2D: batch_size batch_size: batch size :return: log probabilities of y """ batch_size = T.cast(y.shape[1], dtype="int32") # log likelihood of 1st tags # 1D: batch_size y_score_0 = (self.BOS_probability[y[0]] + h[0][T.arange(batch_size), y[0]]) # sum of all log likelihood of 1st all tags # 1D: batch_size, 2D: output_dim z_score_0 = self.BOS_probability + h[0] [_, y_score, z_score], _ = theano.scan( fn=self.forward_probability, sequences=[h[1:], y[1:]], outputs_info=[y[0], y_score_0, z_score_0], ) return y_score[-1] - logsumexp(z_score[-1], axis=1).flatten()
def get_path_score_z(h, W_trans): """ :param h: 1D: n_words, 2D: batch, 3D: n_labels; label score :param W_trans: : 1D: n_labels, 2D: n_labels; label score; transition score between two labels :return: 1D: batch """ alpha_init = h[0] alpha, _ = theano.scan(fn=forward_alpha, sequences=[h[1:]], outputs_info=alpha_init, non_sequences=W_trans) return logsumexp(alpha[-1], axis=1).ravel()
def get_state_score_z(h, y, W_trans): """ :param h: 1D: n_words, 2D: batch, 3D: n_labels (j); label score :param y: 1D: n_words, 2D: batch; label id :param W_trans: 1D: n_labels (i), 2D: n_labels (j); transition score from i to j :return: 1D: n_words, 2D: batch; specified label score """ # 1D: n_words-1, 2D: batch, 3D: n_labels (j); label score trans_scores = get_transition_scores(y, W_trans) # 1D: 1, 2D: batch, 3D: n_labels (j); 0 zero = T.zeros(shape=(1, h.shape[1], h.shape[2]), dtype=theano.config.floatX) # 1D: n_words, 2D: batch, 3D: n_labels (j); label score trans_scores = T.concatenate([zero, trans_scores], axis=0) return logsumexp(h + trans_scores, axis=2)
def forward_step(e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans): """ :param e_t: 1D: Batch, 2D: n_y :param d_t: 1D: Batch :param d_prev: 1D: Batch :param d_score_prev: 1D: Batch :param z_scores_prev: 1D: Batch, 2D: n_y :param trans: 1D: n_y, 2D, n_y """ d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch), d_t] # 1D: Batch z_sum = z_scores_prev.dimshuffle( 0, 'x', 1) + trans # 1D: Batch, 2D: n_y, 3D: n_y z_scores_t = logsumexp(z_sum, axis=2).reshape( e_t.shape) + e_t # 1D: Batch, 2D: n_y return d_t, d_score_t, z_scores_t
def forward_probability(self, h_t, y_t, y_tm1, y_score_tm1, z_score_tm1): """Calculate CRF unit :param h_t: emission 1D: batch_size 2D: output_dim y_t: tag 1D: batch_size y_tm1: previous tag y_score_tm1: log likelihood of previous tag 1D: batch_size z_score_tm1: sum of all log likelihood of all previous tags 1D: batch_size 2D: output_dim :return y_t: tag 1D: batch_size y_score_t: log likelihood of tag 1D: batch_size z_score_t: sum of all log likelihood of all tags 1D: batch_size 2D: output_dim """ batch_size = T.cast(h_t.shape[0], dtype="int32") y_score_t = ( y_score_tm1 # forward + self.W_transition[y_t, y_tm1] # transition + h_t[T.arange(batch_size), y_t]) # emission z_score_t = ( logsumexp( z_score_tm1.dimshuffle(0, 'x', 1) # forward + self.W_transition, # transition axis=2).reshape(h_t.shape) + h_t) # emission return y_t, y_score_t, z_score_t
def hiddens_to_output_probs(hiddens): output = concat_and_multiply(params['predict'], hiddens) return output - logsumexp(output, axis=1, keepdims=True) # Normalize log-probs.