Exemple #1
0
def crf_loss(y, y_, transitions, nums_tags, batch_size):
    tag_scores = y
    nums_steps = len(tf.unstack(tag_scores, axis=1))
    masks = tf.cast(tf.sign(y_), dtype=tf.float32)
    lengths = tf.reduce_sum(tf.sign(y_), axis=1)
    tag_ids = y_
    b_id = tf.stack([[nums_tags]] * batch_size)
    #e_id = tf.pack([[0]] * batch_size)
    padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids])
    idx_tag_ids = tf.stack(
        [tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)],
        axis=1)
    tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags)
    point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2)
    point_score *= masks
    #Save for future
    #trans_score = tf.gather_nd(transitions, idx_tag_ids)
    trans_sh = tf.stack(transitions.get_shape())
    trans_sh = tf.cumprod(trans_sh, exclusive=True, reverse=True)
    flat_tag_ids = tf.reduce_sum(trans_sh * idx_tag_ids, axis=2)
    trans_score = tf.gather(tf.reshape(transitions, [-1]), flat_tag_ids)
    ##
    #extend_mask = tf.concat(1, [tf.ones([batch_size, 1]), masks])
    extend_mask = masks
    trans_score *= extend_mask
    target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score)
    total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags,
                                     lengths, batch_size)()
    return -(target_path_score - total_path_score)
Exemple #2
0
 def decode_graph(self):
     self.decode_holders = []
     self.scores = []
     for bucket in self.buckets_char:
         decode_holders = []
         scores = []
         for nt in self.nums_tags:
             ob = tf.placeholder(tf.float32, [None, bucket, nt])
             trans = tf.placeholder(tf.float32, [nt + 1, nt + 1])
             nums_steps = ob.get_shape().as_list()[1]
             length = tf.placeholder(tf.int32, [None])
             b_size = tf.placeholder(tf.int32, [])
             small = -1000
             class_pad = tf.pack(small * tf.ones([b_size, nums_steps, 1]))
             observations = tf.concat(2, [ob, class_pad])
             b_vec = tf.tile(([small] * nt + [0]), [b_size])
             b_vec = tf.cast(b_vec, tf.float32)
             b_vec = tf.reshape(b_vec, [b_size, 1, -1])
             e_vec = tf.tile(([0] + [small] * nt), [b_size])
             e_vec = tf.cast(e_vec, tf.float32)
             e_vec = tf.reshape(e_vec, [b_size, 1, -1])
             observations = tf.concat(1, [b_vec, observations, e_vec])
             transitions = tf.reshape(tf.tile(trans, [b_size, 1]),
                                      [b_size, nt + 1, nt + 1])
             observations = tf.reshape(observations,
                                       [-1, nums_steps + 2, nt + 1, 1])
             observations = tf.transpose(observations, [1, 0, 2, 3])
             previous = observations[0, :, :, :]
             max_scores = []
             max_scores_pre = []
             alphas = [previous]
             for t in xrange(1, nums_steps + 2):
                 previous = tf.reshape(previous, [-1, nt + 1, 1])
                 current = tf.reshape(observations[t, :, :, :],
                                      [-1, 1, nt + 1])
                 alpha_t = previous + current + transitions
                 max_scores.append(
                     tf.reduce_max(alpha_t, reduction_indices=1))
                 max_scores_pre.append(tf.argmax(alpha_t, dimension=1))
                 alpha_t = tf.reshape(Forward.log_sum_exp(alpha_t, axis=1),
                                      [-1, nt + 1, 1])
                 alphas.append(alpha_t)
                 previous = alpha_t
             max_scores = tf.pack(max_scores, axis=1)
             max_scores_pre = tf.pack(max_scores_pre, axis=1)
             decode_holders.append([ob, trans, length, b_size])
             scores.append((max_scores, max_scores_pre))
         self.decode_holders.append(decode_holders)
         self.scores.append(scores)
Exemple #3
0
def crf_loss(y, y_, ly, ly_, transitions, nums_tags, batch_size):
    """
    计算 CRF 损失函数值
    :param y: 预测值,shape = (batch_size, 句子长度,标签数量),即每个句子中各个字符对应的标签概率
    :param y_: ground truth,shape=(batch_size, 句子长度)
    :param transitions: 标签转移矩阵, shape=(标签数量+1, 标签数量+1)
    :param nums_tags: 标签数量
    :param batch_size:  real batch size
    :return:
    """
    tag_scores = y
    # 句子长度,即解码步数
    nums_steps = len(tf.unstack(tag_scores, axis=1))
    # shape = (batch_size, 句子长度)
    masks = tf.cast(tf.sign(y_), dtype=tf.float32)
    lengths = tf.reduce_sum(tf.sign(y_), axis=1)
    tag_ids = y_
    # shape = (batch_size, 1),实际上就是将 a list of arrays/tensors 变成一个 tensor
    b_id = tf.stack([[nums_tags]] * batch_size)
    # e_id = tf.pack([[0]] * batch_size)
    # shape=(batch_size, 句子长度+1),因为 tag_ids.shape=(batch_size, 句子长度), b_id.shape=(batch_size, 1)
    padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids])
    # tf.slice() 的作用是将输入标签序列中的每个标签两两成对切开,表示的就是标签间的转移关系
    # 每一个 slice 得到的是 shape=(batch_size, 2) 的 tensor,总共有 nums_steps 个,stack 到一个 tensor 里得到
    # shape = (batch_size, 句子长度,2)
    idx_tag_ids = tf.stack(
        [tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)],
        axis=1)
    tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags)
    point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2)
    point_score *= masks
    # Save for future
    # trans_score = tf.gather_nd(transitions, idx_tag_ids)
    trans_sh = tf.stack(transitions.get_shape())
    trans_sh = tf.cumprod(trans_sh, exclusive=True, reverse=True)
    flat_tag_ids = tf.reduce_sum(trans_sh * idx_tag_ids, axis=2)
    trans_score = tf.gather(tf.reshape(transitions, [-1]), flat_tag_ids)
    # extend_mask = tf.concat(1, [tf.ones([batch_size, 1]), masks])
    extend_mask = masks
    trans_score *= extend_mask
    target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score)
    total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags,
                                     lengths, batch_size)()
    tagging_loss = -(target_path_score - total_path_score)
    lm_loss = tf.reduce_sum(sparse_cross_entropy(ly, ly_) * masks)

    #return tagging_loss, tf.zeros_like(lm_loss)
    return tagging_loss, lm_loss
Exemple #4
0
def crf_loss(y, y_, transitions, nums_tags, batch_size):
    tag_scores = y
    nums_steps = len(tf.unstack(tag_scores, axis=1))
    masks = tf.cast(tf.sign(y_), dtype=tf.float32)
    lengths = tf.reduce_sum(tf.sign(y_), axis=1)
    tag_ids = y_
    b_id = tf.stack([[nums_tags]] * batch_size)
    #e_id = tf.pack([[0]] * batch_size)
    padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids])
    idx_tag_ids = tf.stack([tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)], axis=1)
    tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags)
    point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2)
    point_score *= masks
    trans_score = tf.gather_nd(transitions, idx_tag_ids)
    extend_mask = masks
    trans_score *= extend_mask
    target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score)
    total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags, lengths, batch_size)()
    return - (target_path_score - total_path_score)