Example #1
0
    def build(self, input_shape):
        super(MaximumEntropyMarkovModel, self).build(input_shape)
        output_dim = input_shape[-1]

        if self.hidden_dim is None:
            self._trans = self.add_weight(name='trans',
                                          shape=(output_dim, output_dim),
                                          initializer='glorot_uniform',
                                          trainable=True)
            if self.lr_multiplier != 1:
                K.set_value(self._trans,
                            K.eval(self._trans) / self.lr_multiplier)
        else:
            self._l_trans = self.add_weight(name='l_trans',
                                            shape=(output_dim,
                                                   self.hidden_dim),
                                            initializer='glorot_uniform',
                                            trainable=True)
            self._r_trans = self.add_weight(name='r_trans',
                                            shape=(output_dim,
                                                   self.hidden_dim),
                                            initializer='glorot_uniform',
                                            trainable=True)

            if self.lr_multiplier != 1:
                K.set_value(self._l_trans,
                            K.eval(self._l_trans) / self.lr_multiplier)
                K.set_value(self._r_trans,
                            K.eval(self._r_trans) / self.lr_multiplier)
Example #2
0
    def build(self, input_shape):
        output_dim = input_shape[-1]
        if not isinstance(output_dim, int):
            output_dim = output_dim.value

        if self.hidden_dim is None:
            self.trans = self.add_weight(name='trans',
                                         shape=(output_dim, output_dim),
                                         initializer='glorot_uniform',
                                         trainable=True)
            if self.lr_multiplier != 1:
                K.set_value(self.trans,
                            K.eval(self.trans) / self.lr_multiplier)
                self.trans = self.lr_multiplier * self.trans
        else:
            self.l_trans = self.add_weight(name='l_trans',
                                           shape=(output_dim, self.hidden_dim),
                                           initializer='glorot_uniform',
                                           trainable=True)
            self.r_trans = self.add_weight(name='r_trans',
                                           shape=(output_dim, self.hidden_dim),
                                           initializer='glorot_uniform',
                                           trainable=True)

            if self.lr_multiplier != 1:
                K.set_value(self.l_trans,
                            K.eval(self.l_trans) / self.lr_multiplier)
                self.l_trans = self.lr_multiplier * self.l_trans
                K.set_value(self.r_trans,
                            K.eval(self.r_trans) / self.lr_multiplier)
                self.r_trans = self.lr_multiplier * self.r_trans
Example #3
0
 def train_function(inputs):  # 重新定义训练函数
     grads = embedding_gradients(inputs)[0]  # Embedding梯度
     delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8)  # 计算扰动
     K.set_value(embeddings, K.eval(embeddings) + delta)  # 注入扰动
     outputs = old_train_function(inputs)  # 梯度下降
     K.set_value(embeddings, K.eval(embeddings) - delta)  # 删除扰动
     return outputs
 def build(self, input_shape):
     self.log_vars = []
     for i in range(2):
         self.log_vars += [
             self.add_weight(name='log_var' + str(i),
                             shape=(1, ),
                             initializer=Constant(0.),
                             trainable=True)
         ]
     super(ConditionalRandomField, self).build(input_shape)
     #         print('input_shape:',input_shape)
     #         a=input()
     seq_output_dim, tag_output_dim = input_shape[0][-1], input_shape[1][-1]
     self._trans1 = self.add_weight(name='trans_seq',
                                    shape=(seq_output_dim, seq_output_dim),
                                    initializer='glorot_uniform',
                                    trainable=True)
     self._trans2 = self.add_weight(name='trans_tag',
                                    shape=(tag_output_dim, tag_output_dim),
                                    initializer='glorot_uniform',
                                    trainable=True)
     if self.seq_lr_multiplier != 1:
         K.set_value(self._trans1,
                     K.eval(self._trans1) / self.seq_lr_multiplier)
     if self.tag_lr_multiplier != 1:
         K.set_value(self._trans2,
                     K.eval(self._trans2) / self.tag_lr_multiplier)
Example #5
0
def run(inputs):
    input_ids_and_segment_ids, labels = inputs
    # ignore bias for simplicity
    loss_grads = []
    activations = []

    def top_k(input, k=1, sorted=True):
        """Top k max pooling
        Args:
            input(ndarray): convolutional feature in heigh x width x channel format
            k(int): if k==1, it is equal to normal max pooling
            sorted(bool): whether to return the array sorted by channel value
        Returns:
            ndarray: k x (height x width)
            ndarray: k
        """
        ind = np.argpartition(input, -k)[..., -k:]

        def get_entries(input, ind, sorted):
            if len(ind.shape) == 1:
                if sorted:
                    ind = ind[np.argsort(-input[ind])]
                return input[ind], ind
            output, ind = zip(
                *[get_entries(inp, id, sorted) for inp, id in zip(input, ind)])
            return np.array(output), np.array(ind)

        return get_entries(input, ind, sorted)

    for mp, ml, sess in zip(models_penultimate, models_last, sessions):
        with sess.as_default():
            # h = mp(tf.convert_to_tensor(input_ids_and_segment_ids))
            h = mp.predict(input_ids_and_segment_ids)
            # print(K.eval(h))
            # print(h)
            logits = ml(tf.convert_to_tensor(h))
            probs = tf.nn.softmax(logits)
            # probs = logits

            loss_grad = tf.one_hot(labels, 2) - probs
            # print(K.eval(tf.one_hot(labels, 2)))
            activations.append(h)
            # print(h.shape)
            # print((h))
            # print(K.eval(loss_grad))
            # print("#"*50)
            loss_grads.append(K.eval(loss_grad))
            probs = K.eval(probs)
    # Using probs from last checkpoint
    probs, predicted_labels = top_k(probs, k=1)
    # exit(0)
    return np.stack(loss_grads, axis=-1), np.stack(
        activations, axis=-1), labels, probs, predicted_labels
Example #6
0
 def train_function(inputs):  # 重新定义训练函数
     outputs = model_outputs(inputs)
     inputs = inputs[:2] + outputs + inputs[3:]
     delta1, delta2 = 0.0, np.random.randn(*K.int_shape(embeddings))
     for _ in range(iters):  # 迭代求扰动
         delta2 = xi * l2_normalize(delta2)
         K.set_value(embeddings, K.eval(embeddings) - delta1 + delta2)
         delta1 = delta2
         delta2 = embedding_gradients(inputs)[0]  # Embedding梯度
     delta2 = epsilon * l2_normalize(delta2)
     K.set_value(embeddings, K.eval(embeddings) - delta1 + delta2)
     outputs = old_train_function(inputs)  # 梯度下降
     K.set_value(embeddings, K.eval(embeddings) - delta2)  # 删除扰动
     return outputs
Example #7
0
def extract_arguments(text):
    """arguments抽取函数  冻结部分Bert 层
    """
    tokens = tokenizer.tokenize(text)  #转化为tokens
    while len(tokens) > 510:  #大于510,pop
        tokens.pop(-2)
    mapping = tokenizer.rematch(text, tokens)  # 进行文本和token的匹配
    token_ids = tokenizer.tokens_to_ids(tokens)  # 找到tokens的ID
    segment_ids = [0] * len(token_ids)  #找到segment的ID
    nodes = model.predict([[token_ids], [segment_ids]])[0]  #模型预测
    trans = K.eval(CRF.trans)  #
    labels = viterbi_decode(nodes, trans)
    arguments, starting = [], False
    for i, label in enumerate(labels):
        if label > 0:
            if label % 2 == 1:
                starting = True
                arguments.append([[i], id2label[(label - 1) // 2]])
            elif starting:
                arguments[-1][0].append(i)
            else:
                starting = False
        else:
            starting = False
    for w, l in arguments:
        if w[-1] == len(tokens) - 1: w[-1] = len(tokens) - 2
    return {
        text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1]: l
        for w, l in arguments
    }
def named_entity_recognize(text):
    """命名实体识别函数
    """
    tokens = tokenizer.tokenize(text)
    while len(tokens) > 512:
        tokens.pop(-2)
    mapping = tokenizer.rematch(text, tokens)
    token_ids = tokenizer.tokens_to_ids(tokens)
    segment_ids = [0] * len(token_ids)
    nodes = model.predict([[token_ids], [segment_ids]])[0]
    trans = K.eval(CRF.trans)
    labels = viterbi_decode(nodes, trans)
    entities, starting = [], False
    for i, label in enumerate(labels):
        if label > 0:
            if label % 2 == 1:
                starting = True
                entities.append([[i], id2label[(label - 1) // 2]])
            elif starting:
                entities[-1][0].append(i)
            else:
                starting = False
        else:
            starting = False

    return [(text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1], l)
            for w, l in entities]
Example #9
0
 def build_ViterbiDecoder(self):
     self.NER = NamedEntityRecognizer(trans=K.eval(self.CRF.trans),
                                      tokenizer=self.tokenizer,
                                      model=self.model,
                                      id2label=self.index2label,
                                      starts=[0],
                                      ends=[0])
Example #10
0
def extract_arguments(text):
    """命名实体识别函数
    """
    tokens = tokenizer.tokenize(text)
    while len(tokens) > 512:
        tokens.pop(-2)
    token_ids = tokenizer.tokens_to_ids(tokens)
    segment_ids = [0] * len(token_ids)
    nodes = model.predict([[token_ids], [segment_ids]])[0]
    trans = K.eval(CRF.trans)
    labels = viterbi_decode(nodes, trans)[1:-1]
    arguments, starting = [], False
    for token, label in zip(tokens[1:-1], labels):
        if label > 0:
            if label % 2 == 1:
                starting = True
                arguments.append([[token], id2label[(label - 1) // 2]])
            elif starting:
                arguments[-1][0].append(token)
            else:
                starting = False
        else:
            starting = False

    return {tokenizer.decode(w, w): l for w, l in arguments}
Example #11
0
 def __init__(self):
     self.CRF = ConditionalRandomField(
         lr_multiplier=config.crf_lr_multiplier)
     self.model = self.get_model()
     self.NER = NamedEntityRecognizer(trans=K.eval(self.CRF.trans),
                                      starts=[0],
                                      ends=[0])
Example #12
0
def extract_arguments(text, model, CRF):
    """ arguments抽取函数 """
    tokens = tokenizer.tokenize(text)
    while len(tokens) > 510:
        tokens.pop(-2)
    mapping = tokenizer.rematch(text, tokens)
    token_ids = tokenizer.tokens_to_ids(tokens)
    segment_ids = [0] * len(token_ids)
    nodes = model.predict([[token_ids], [segment_ids]])[0]
    trans = K.eval(CRF.trans)
    labels = viterbi_decode(nodes, trans)
    arguments, starting = [], False
    for i, label in enumerate(labels):
        if label > 0:
            if label % 2 == 1:
                starting = True
                arguments.append([[i], id2label[(label - 1) // 2]])
            elif starting:
                arguments[-1][0].append(i)
            else:
                starting = False
        else:
            starting = False

    try:
        return {
            text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1]: l
            for w, l in arguments
        }
    except:
        return {}
Example #13
0
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(CRF.trans)
     NER.trans = trans
     #print(NER.trans)
     f1, precision, recall = evaluate(valid_data)
     f1_v, precision_v, recall_v = evaluate_valid(dev_data)
     # 保存最优
     if normal_train and not cross_train:
         if f1 >= self.best_val_f1:
             self.best_val_f1 = f1
             model.save_weights('./best_model.weight')
         print(
             'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n'
             % (f1, precision, recall, self.best_val_f1))
         if f1_v >= self.best_val_f1_v:
             self.best_val_f1_v = f1_v
             model.save_weights('./best_model_new.weights')
         print(
             'valid:  f1_v: %.5f, precision_v: %.5f, recall_v: %.5f, best f1_v: %.5f\n'
             % (f1_v, precision_v, recall_v, self.best_val_f1_v))
     if cross_train and not normal_train:
         if f1 >= self.best_val_f1:
             self.best_val_f1 = f1
             model.save_weights('./best_model_{}_{}.weights'.format(
                 seed_value, id))
         print(
             'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n'
             % (f1, precision, recall, self.best_val_f1))
         if f1_v >= self.best_val_f1_v:
             self.best_val_f1_v = f1_v
             model.save_weights('./best_model_new_{}_{}.weights'.format(
                 seed_value, id))
         print(
             'valid:  f1_v: %.5f, precision_v: %.5f, recall_v: %.5f, best f1_v: %.5f\n'
             % (f1_v, precision_v, recall_v, self.best_val_f1_v))
Example #14
0
def extract_arguments(text):
    """arguments抽取函数
    """
    #注意这个4000
    '''并没有重写tokenize,所以注意4000人'''
    #text='雀巢裁员4000人:时代抛弃你时,连招呼都不会打!'
    #tokens  ['[CLS]', '雀','巢', '裁', '员', '4000', '人',':','时', '代', '抛', '弃', '你', '时', ',', '连', '招', '呼', '都', '不', '会', '打', '!', '[SEP]']
    tokens = tokenizer.tokenize(text)
    while len(tokens) > 510:
        tokens.pop(-2)  #把倒数第二个词删掉

    #得到映射[[], [0], [1], [2], [3], [4, 5, 6, 7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], []]
    mapping = tokenizer.rematch(text, tokens)

    #输入[101, 7411, 2338, 6161, 1447, 8442, 782, 8038, 3198, 807, 2837, 2461, 872, 3198, 8024, 6825, 2875, 1461, 6963, 679, 833, 2802, 8013, 102]
    token_ids = tokenizer.tokens_to_ids(tokens)

    #输入[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    segment_ids = [0] * len(token_ids)

    #nodes.shape  (24, 435)
    nodes = model.predict([[token_ids], [segment_ids]])[0]

    #(435, 435)
    trans = K.eval(CRF.trans)
    #假设预测labels=[0, 363, 364, 364, 0, 365, 366, 0, 333, 334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    labels = viterbi_decode(nodes, trans)
    arguments, starting = [], False
    for i, label in enumerate(labels):
        if label > 0:
            ch = text[mapping[i][0]:mapping[i][-1] + 1]
            if label % 2 == 1:
                starting = True
                arguments.append([[i], id2label[(label - 1) // 2]])
            elif starting:
                arguments[-1][0].append(i)
            else:
                starting = False
        else:
            starting = False
    #原理,预测label的位置1, mapping中已经把位置编码好了,[0]对应1
    #映射[[], [0], [1], [2], [3], [4, 5, 6, 7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], []]
    #labels=[0, 363, 364, 364, 0, 365, 366, 0, 333, 334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    #arguments为


#    [[[1,2,3], ('组织关系-裁员', '裁员方')],
#     [[5,6], ('组织关系-裁员', '裁员人数')],
#     [[8,9], ('灾害/意外-坍/垮塌', '时间')],  ]
#return
#    {
#    '雀巢裁': ('组织关系-裁员', '裁员方')
#    '4000人': ('组织关系-裁员', '裁员人数'),
#     '时代': ('灾害/意外-坍/垮塌', '时间'),
#     }
    return {
        text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1]: l
        for w, l in arguments
    }
Example #15
0
 def build(self, input_shape):
     super(ConditionalRandomField, self).build(input_shape)
     output_dim = input_shape[-1]
     self._trans = self.add_weight(name='trans',
                                   shape=(output_dim, output_dim),
                                   initializer='glorot_uniform')
     if self.lr_multiplier != 1:
         K.set_value(self._trans, K.eval(self._trans) / self.lr_multiplier)
Example #16
0
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(CRF.trans)
     print(trans)
     acc = simple_evaluate(valid_data)
     # 保存最优
     if acc >= self.best_val_acc:
         self.best_val_acc = acc
         model.save_weights('./best_model.weights')
     print('acc: %.5f, best acc: %.5f' % (acc, self.best_val_acc))
Example #17
0
 def build(self, input_shape):
     output_dim = input_shape[-1]
     self.trans = self.add_weight(name='trans',
                                  shape=(output_dim, output_dim),
                                  initializer='glorot_uniform',
                                  trainable=True)
     if self.lr_multiplier != 1:
         K.set_value(self.trans, K.eval(self.trans) / self.lr_multiplier)
         self.trans = self.lr_multiplier * self.trans
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(self.CRF.trans)
     f1, precision, recall = evaluate(self.valid_data, self.model, self.CRF, self.i2tag_dict)
     # 保存最优
     if f1 >= self.best_val_f1:
         self.best_val_f1 = f1
         self.model.save_weights('./best_model.weights')
     print('valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' %
           (f1, precision, recall, self.best_val_f1))
Example #19
0
    def train_function(inputs):  # 重新定义训练函数
        #         grads = embedding_gradients(inputs)[0]  # Embedding梯度
        #         delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8)  # 计算扰动
        grads = embedding_gradients(inputs)  # Embedding梯度
        deltas = [
            epsilon * grad / (np.sqrt((grad**2).sum()) + 1e-8)
            for grad in grads
        ]  # 计算扰动
        # 注入扰动
        # K.set_value(embeddings, K.eval(embeddings) + delta)
        for embedding, delta in zip(embeddings, deltas):
            K.set_value(embedding, K.eval(embedding) + delta)

        outputs = old_train_function(inputs)  # 梯度下降
        # 删除扰动
        # K.set_value(embeddings, K.eval(embeddings) - delta)  # 删除扰动
        for embedding, delta in zip(embeddings, deltas):
            K.set_value(embedding, K.eval(embedding) - delta)
        return outputs
Example #20
0
        def apply_ema_weights(self, bias_correction=True):
            """备份原模型权重,然后将平均权重应用到模型上去。
            """
            self.old_weights = K.batch_get_value(self.model_weights)
            ema_weights = K.batch_get_value(self.ema_weights)

            if bias_correction:
                iterations = K.eval(self.iterations)
                scale = 1.0 - np.power(self.ema_momentum, iterations)
                ema_weights = [weight / scale for weight in ema_weights]

            K.batch_set_value(zip(self.model_weights, ema_weights))
Example #21
0
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(self.CRF.trans)
     self.NER.trans = trans
     #         print(NER.trans)
     f1, precision, recall = self.evaluate()
     # 保存最优
     if f1 >= self.best_val_f1:
         self.best_val_f1 = f1
         self.model.save_weights('best_model_epoch_10.weights')
     print(
         'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n'
         % (f1, precision, recall, self.best_val_f1))
Example #22
0
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(CRF.trans)
     NER.trans = trans
     print(NER.trans)
     f1, precision, recall = evaluate(valid_data)
     # 保存最优
     if f1 >= self.best_val_f1:
         self.best_val_f1 = f1
         model.save_weights('./best_model.weights')
     print(
         'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n'
         % (f1, precision, recall, self.best_val_f1))
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(self.CRF.trans)
     self.NER.trans = trans
     # print(self.NER.trans)
     f1, precision, recall = evaluate(self.valid_data, self.recognize)
     # 保存最优
     if f1 >= self.best_val_f1:
         self.best_val_f1 = f1
         self.model.save_weights(self.model_path)
     print(
         'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f \n'
         % (f1, precision, recall, self.best_val_f1))
Example #24
0
 def on_epoch_end(self, epoch, logs=None):
     trans = K.eval(CRF.trans)
     NER.trans = trans
     #         print(NER.trans)
     f1, precision, recall = evaluate(self.valid_data)
     # 保存最优
     if f1 >= self.best_val_f1:
         self.best_val_f1 = f1
         model.save_weights('./best_bilstm_model_{}.weights'.format(self.mode))
     logging.info(
         'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' %
         (f1, precision, recall, self.best_val_f1)
     )
Example #25
0
    def on_epoch_end(self, epoch, logs=None):
        lr = K.get_value(self.model.optimizer.lr)
        trans = K.eval(self.CRF.trans)
        self.NER.trans = trans
        # print(NER.trans)
        f1, precision, recall = evaluate(self.valid_data, self.NER, self.model)
        if f1 >= self.best_val_f1:
            # 取得新的最优f1, 更新最优f1, 保存模型
            self.best_val_f1 = f1
            self.model.save_weights(self.model_save_to)
        print('F1: %.5f, P: %.5f, R: %.5f, best f1: %.5f, lr: %.6f\n' %
              (f1, precision, recall, self.best_val_f1, lr))

        if True:  # 学习率调整策略0(学习率逐步降低0.3,当学习率接近0时,固定学习率)
            if lr * 0.7 > 0.000001:
                K.set_value(self.model.optimizer.lr, lr * 0.7)
        if False:  # 学习率调整策略1(学习率逐步降低0.3,当学习率接近0时,重置学习率)
            if lr * 0.7 >= 0.000001:
                K.set_value(self.model.optimizer.lr, lr * 0.7)
            else:
                K.set_value(self.model.optimizer.lr, 1e-4)
        if False:  # 学习率调整策略2(若f1降低, 则降低学习率, 当学习率接近0时,重置学习率)
            if f1 >= self.pre_f1:
                # 若f1 优于上一轮,重置早停, bad_count和patience计数器
                self.scheduler_patience = 1
                self.early_stop_patience = 3
                self.bad_count = 3
            else:
                self.scheduler_patience -= 1
                self.early_stop_patience -= 1
                self.bad_count -= 1
                if self.early_stop_patience == 0:
                    pass  # 去除早停
                    exit()
                # 若f1比上一轮的结果差,则降低学习率
                if self.scheduler_patience == 0:
                    # 若学习率过低, 则重置学习率
                    if lr * 0.7 >= 0.000001:
                        K.set_value(self.model.optimizer.lr, lr * 0.7)
                    else:
                        K.set_value(self.model.optimizer.lr, 1e-4)
                    # 调整后,重置patience计数器
                    self.scheduler_patience = 1
                if self.bad_count == 0:
                    K.set_value(self.model.optimizer.lr, 1e-4)
                    # 调整后,重置bad_count计数器
                    self.bad_count = 3

            # 更新pre_f1
            self.pre_f1 = f1
def named_entity_recognize(text, model, CRF, id2class):
    """命名实体识别函数
    """
    tokens = tokenizer.tokenize(text)
    # print(tokens)
    # print('token', len(tokens))

    while len(tokens) > 512:
        tokens.pop(-2)
    token_ids = tokenizer.tokens_to_ids(tokens)
    segment_ids = [0] * len(token_ids)
    nodes = model.predict([[token_ids], [segment_ids]])[0]
    trans = K.eval(CRF.trans)
    labels = ViterbiDecoder(trans).decode(nodes)[1:-1]
    return labels
Example #27
0
    def on_epoch_end(self, epoch, logs=None):
        trans = K.eval(CRF.trans)
        NER.trans = trans

        f1, precision, recall = evaluate(valid_data)
        # 保存最优
        if f1 >= self.best_val_f1:
            self.best_val_f1 = f1
            model.save_weights('../medical_ner/' + str(self.best_val_f1) +
                               'medical_ner.weights')
        print(
            'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n'
            % (f1, precision, recall, self.best_val_f1))
        f1, precision, recall = evaluate(test_data)
        print('test:  f1: %.5f, precision: %.5f, recall: %.5f\n' %
              (f1, precision, recall))
    def on_epoch_end(self, epoch, logs=None):
        trans = K.eval(CRF.trans)
        NER.trans = trans
        # print(NER.trans)
        f1, precision, recall = evaluate(valid_data)
        # 保存最优
        if f1 >= self.best_val_f1:
            self.best_val_f1 = f1
            model.save_weights('./model/electra_base_ner_best_model.weights')

        test_f1, test_precision, test_recall = evaluate(test_data)

        print(
            'valid:  f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n'
            % (f1, precision, recall, self.best_val_f1))
        print('test:  f1: %.5f, precision: %.5f, recall: %.5f\n' %
              (test_f1, test_precision, test_recall))
Example #29
0
    def on_epoch_end(self, epoch, logs=None):
        # self.model is auto set by keras
        yt, yp = [], []
        trans = K.eval(CRF.trans)
        NER.trans = trans
        pred = self.smodel.predict(self.X, batch_size=16)

        for i, yseq in enumerate(self.Y):
            labels = NER.decode(pred[i])
            yt.append([self.tags[z] for z in labels])
            yp.append([self.tags[z] for z in yseq])

        f1 = f1_score(yt, yp)
        self.best_f1 = max(self.best_f1, f1)
        accu = accuracy_score(yt, yp)
        print('\naccu: %.4f  F1: %.4f  BestF1: %.4f\n' %
              (accu, f1, self.best_f1))
        print(classification_report(yt, yp))
Example #30
0
def word_segment(text):
    """分词函数
    """
    tokens = tokenizer.tokenize(text)
    while len(tokens) > 512:
        tokens.pop(-2)
    token_ids = tokenizer.tokens_to_ids(tokens)
    segment_ids = [0] * len(token_ids)
    nodes = model.predict([[token_ids], [segment_ids]])[0]
    trans = K.eval(CRF.trans)
    labels = viterbi_decode(nodes, trans)[1:-1]
    words = []
    for token, label in zip(tokens[1:-1], labels):
        if label < 2 or len(words) == 0:
            words.append([token])
        else:
            words[-1].append(token)
    return [tokenizer.decode(w, w).replace(' ', '') for w in words]