def build(self, input_shape): super(MaximumEntropyMarkovModel, self).build(input_shape) output_dim = input_shape[-1] if self.hidden_dim is None: self._trans = self.add_weight(name='trans', shape=(output_dim, output_dim), initializer='glorot_uniform', trainable=True) if self.lr_multiplier != 1: K.set_value(self._trans, K.eval(self._trans) / self.lr_multiplier) else: self._l_trans = self.add_weight(name='l_trans', shape=(output_dim, self.hidden_dim), initializer='glorot_uniform', trainable=True) self._r_trans = self.add_weight(name='r_trans', shape=(output_dim, self.hidden_dim), initializer='glorot_uniform', trainable=True) if self.lr_multiplier != 1: K.set_value(self._l_trans, K.eval(self._l_trans) / self.lr_multiplier) K.set_value(self._r_trans, K.eval(self._r_trans) / self.lr_multiplier)
def build(self, input_shape): output_dim = input_shape[-1] if not isinstance(output_dim, int): output_dim = output_dim.value if self.hidden_dim is None: self.trans = self.add_weight(name='trans', shape=(output_dim, output_dim), initializer='glorot_uniform', trainable=True) if self.lr_multiplier != 1: K.set_value(self.trans, K.eval(self.trans) / self.lr_multiplier) self.trans = self.lr_multiplier * self.trans else: self.l_trans = self.add_weight(name='l_trans', shape=(output_dim, self.hidden_dim), initializer='glorot_uniform', trainable=True) self.r_trans = self.add_weight(name='r_trans', shape=(output_dim, self.hidden_dim), initializer='glorot_uniform', trainable=True) if self.lr_multiplier != 1: K.set_value(self.l_trans, K.eval(self.l_trans) / self.lr_multiplier) self.l_trans = self.lr_multiplier * self.l_trans K.set_value(self.r_trans, K.eval(self.r_trans) / self.lr_multiplier) self.r_trans = self.lr_multiplier * self.r_trans
def train_function(inputs): # 重新定义训练函数 grads = embedding_gradients(inputs)[0] # Embedding梯度 delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8) # 计算扰动 K.set_value(embeddings, K.eval(embeddings) + delta) # 注入扰动 outputs = old_train_function(inputs) # 梯度下降 K.set_value(embeddings, K.eval(embeddings) - delta) # 删除扰动 return outputs
def build(self, input_shape): self.log_vars = [] for i in range(2): self.log_vars += [ self.add_weight(name='log_var' + str(i), shape=(1, ), initializer=Constant(0.), trainable=True) ] super(ConditionalRandomField, self).build(input_shape) # print('input_shape:',input_shape) # a=input() seq_output_dim, tag_output_dim = input_shape[0][-1], input_shape[1][-1] self._trans1 = self.add_weight(name='trans_seq', shape=(seq_output_dim, seq_output_dim), initializer='glorot_uniform', trainable=True) self._trans2 = self.add_weight(name='trans_tag', shape=(tag_output_dim, tag_output_dim), initializer='glorot_uniform', trainable=True) if self.seq_lr_multiplier != 1: K.set_value(self._trans1, K.eval(self._trans1) / self.seq_lr_multiplier) if self.tag_lr_multiplier != 1: K.set_value(self._trans2, K.eval(self._trans2) / self.tag_lr_multiplier)
def run(inputs): input_ids_and_segment_ids, labels = inputs # ignore bias for simplicity loss_grads = [] activations = [] def top_k(input, k=1, sorted=True): """Top k max pooling Args: input(ndarray): convolutional feature in heigh x width x channel format k(int): if k==1, it is equal to normal max pooling sorted(bool): whether to return the array sorted by channel value Returns: ndarray: k x (height x width) ndarray: k """ ind = np.argpartition(input, -k)[..., -k:] def get_entries(input, ind, sorted): if len(ind.shape) == 1: if sorted: ind = ind[np.argsort(-input[ind])] return input[ind], ind output, ind = zip( *[get_entries(inp, id, sorted) for inp, id in zip(input, ind)]) return np.array(output), np.array(ind) return get_entries(input, ind, sorted) for mp, ml, sess in zip(models_penultimate, models_last, sessions): with sess.as_default(): # h = mp(tf.convert_to_tensor(input_ids_and_segment_ids)) h = mp.predict(input_ids_and_segment_ids) # print(K.eval(h)) # print(h) logits = ml(tf.convert_to_tensor(h)) probs = tf.nn.softmax(logits) # probs = logits loss_grad = tf.one_hot(labels, 2) - probs # print(K.eval(tf.one_hot(labels, 2))) activations.append(h) # print(h.shape) # print((h)) # print(K.eval(loss_grad)) # print("#"*50) loss_grads.append(K.eval(loss_grad)) probs = K.eval(probs) # Using probs from last checkpoint probs, predicted_labels = top_k(probs, k=1) # exit(0) return np.stack(loss_grads, axis=-1), np.stack( activations, axis=-1), labels, probs, predicted_labels
def train_function(inputs): # 重新定义训练函数 outputs = model_outputs(inputs) inputs = inputs[:2] + outputs + inputs[3:] delta1, delta2 = 0.0, np.random.randn(*K.int_shape(embeddings)) for _ in range(iters): # 迭代求扰动 delta2 = xi * l2_normalize(delta2) K.set_value(embeddings, K.eval(embeddings) - delta1 + delta2) delta1 = delta2 delta2 = embedding_gradients(inputs)[0] # Embedding梯度 delta2 = epsilon * l2_normalize(delta2) K.set_value(embeddings, K.eval(embeddings) - delta1 + delta2) outputs = old_train_function(inputs) # 梯度下降 K.set_value(embeddings, K.eval(embeddings) - delta2) # 删除扰动 return outputs
def extract_arguments(text): """arguments抽取函数 冻结部分Bert 层 """ tokens = tokenizer.tokenize(text) #转化为tokens while len(tokens) > 510: #大于510,pop tokens.pop(-2) mapping = tokenizer.rematch(text, tokens) # 进行文本和token的匹配 token_ids = tokenizer.tokens_to_ids(tokens) # 找到tokens的ID segment_ids = [0] * len(token_ids) #找到segment的ID nodes = model.predict([[token_ids], [segment_ids]])[0] #模型预测 trans = K.eval(CRF.trans) # labels = viterbi_decode(nodes, trans) arguments, starting = [], False for i, label in enumerate(labels): if label > 0: if label % 2 == 1: starting = True arguments.append([[i], id2label[(label - 1) // 2]]) elif starting: arguments[-1][0].append(i) else: starting = False else: starting = False for w, l in arguments: if w[-1] == len(tokens) - 1: w[-1] = len(tokens) - 2 return { text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1]: l for w, l in arguments }
def named_entity_recognize(text): """命名实体识别函数 """ tokens = tokenizer.tokenize(text) while len(tokens) > 512: tokens.pop(-2) mapping = tokenizer.rematch(text, tokens) token_ids = tokenizer.tokens_to_ids(tokens) segment_ids = [0] * len(token_ids) nodes = model.predict([[token_ids], [segment_ids]])[0] trans = K.eval(CRF.trans) labels = viterbi_decode(nodes, trans) entities, starting = [], False for i, label in enumerate(labels): if label > 0: if label % 2 == 1: starting = True entities.append([[i], id2label[(label - 1) // 2]]) elif starting: entities[-1][0].append(i) else: starting = False else: starting = False return [(text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1], l) for w, l in entities]
def build_ViterbiDecoder(self): self.NER = NamedEntityRecognizer(trans=K.eval(self.CRF.trans), tokenizer=self.tokenizer, model=self.model, id2label=self.index2label, starts=[0], ends=[0])
def extract_arguments(text): """命名实体识别函数 """ tokens = tokenizer.tokenize(text) while len(tokens) > 512: tokens.pop(-2) token_ids = tokenizer.tokens_to_ids(tokens) segment_ids = [0] * len(token_ids) nodes = model.predict([[token_ids], [segment_ids]])[0] trans = K.eval(CRF.trans) labels = viterbi_decode(nodes, trans)[1:-1] arguments, starting = [], False for token, label in zip(tokens[1:-1], labels): if label > 0: if label % 2 == 1: starting = True arguments.append([[token], id2label[(label - 1) // 2]]) elif starting: arguments[-1][0].append(token) else: starting = False else: starting = False return {tokenizer.decode(w, w): l for w, l in arguments}
def __init__(self): self.CRF = ConditionalRandomField( lr_multiplier=config.crf_lr_multiplier) self.model = self.get_model() self.NER = NamedEntityRecognizer(trans=K.eval(self.CRF.trans), starts=[0], ends=[0])
def extract_arguments(text, model, CRF): """ arguments抽取函数 """ tokens = tokenizer.tokenize(text) while len(tokens) > 510: tokens.pop(-2) mapping = tokenizer.rematch(text, tokens) token_ids = tokenizer.tokens_to_ids(tokens) segment_ids = [0] * len(token_ids) nodes = model.predict([[token_ids], [segment_ids]])[0] trans = K.eval(CRF.trans) labels = viterbi_decode(nodes, trans) arguments, starting = [], False for i, label in enumerate(labels): if label > 0: if label % 2 == 1: starting = True arguments.append([[i], id2label[(label - 1) // 2]]) elif starting: arguments[-1][0].append(i) else: starting = False else: starting = False try: return { text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1]: l for w, l in arguments } except: return {}
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) NER.trans = trans #print(NER.trans) f1, precision, recall = evaluate(valid_data) f1_v, precision_v, recall_v = evaluate_valid(dev_data) # 保存最优 if normal_train and not cross_train: if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('./best_model.weight') print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1)) if f1_v >= self.best_val_f1_v: self.best_val_f1_v = f1_v model.save_weights('./best_model_new.weights') print( 'valid: f1_v: %.5f, precision_v: %.5f, recall_v: %.5f, best f1_v: %.5f\n' % (f1_v, precision_v, recall_v, self.best_val_f1_v)) if cross_train and not normal_train: if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('./best_model_{}_{}.weights'.format( seed_value, id)) print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1)) if f1_v >= self.best_val_f1_v: self.best_val_f1_v = f1_v model.save_weights('./best_model_new_{}_{}.weights'.format( seed_value, id)) print( 'valid: f1_v: %.5f, precision_v: %.5f, recall_v: %.5f, best f1_v: %.5f\n' % (f1_v, precision_v, recall_v, self.best_val_f1_v))
def extract_arguments(text): """arguments抽取函数 """ #注意这个4000 '''并没有重写tokenize,所以注意4000人''' #text='雀巢裁员4000人:时代抛弃你时,连招呼都不会打!' #tokens ['[CLS]', '雀','巢', '裁', '员', '4000', '人',':','时', '代', '抛', '弃', '你', '时', ',', '连', '招', '呼', '都', '不', '会', '打', '!', '[SEP]'] tokens = tokenizer.tokenize(text) while len(tokens) > 510: tokens.pop(-2) #把倒数第二个词删掉 #得到映射[[], [0], [1], [2], [3], [4, 5, 6, 7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], []] mapping = tokenizer.rematch(text, tokens) #输入[101, 7411, 2338, 6161, 1447, 8442, 782, 8038, 3198, 807, 2837, 2461, 872, 3198, 8024, 6825, 2875, 1461, 6963, 679, 833, 2802, 8013, 102] token_ids = tokenizer.tokens_to_ids(tokens) #输入[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] segment_ids = [0] * len(token_ids) #nodes.shape (24, 435) nodes = model.predict([[token_ids], [segment_ids]])[0] #(435, 435) trans = K.eval(CRF.trans) #假设预测labels=[0, 363, 364, 364, 0, 365, 366, 0, 333, 334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] labels = viterbi_decode(nodes, trans) arguments, starting = [], False for i, label in enumerate(labels): if label > 0: ch = text[mapping[i][0]:mapping[i][-1] + 1] if label % 2 == 1: starting = True arguments.append([[i], id2label[(label - 1) // 2]]) elif starting: arguments[-1][0].append(i) else: starting = False else: starting = False #原理,预测label的位置1, mapping中已经把位置编码好了,[0]对应1 #映射[[], [0], [1], [2], [3], [4, 5, 6, 7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], []] #labels=[0, 363, 364, 364, 0, 365, 366, 0, 333, 334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] #arguments为 # [[[1,2,3], ('组织关系-裁员', '裁员方')], # [[5,6], ('组织关系-裁员', '裁员人数')], # [[8,9], ('灾害/意外-坍/垮塌', '时间')], ] #return # { # '雀巢裁': ('组织关系-裁员', '裁员方') # '4000人': ('组织关系-裁员', '裁员人数'), # '时代': ('灾害/意外-坍/垮塌', '时间'), # } return { text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1]: l for w, l in arguments }
def build(self, input_shape): super(ConditionalRandomField, self).build(input_shape) output_dim = input_shape[-1] self._trans = self.add_weight(name='trans', shape=(output_dim, output_dim), initializer='glorot_uniform') if self.lr_multiplier != 1: K.set_value(self._trans, K.eval(self._trans) / self.lr_multiplier)
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) print(trans) acc = simple_evaluate(valid_data) # 保存最优 if acc >= self.best_val_acc: self.best_val_acc = acc model.save_weights('./best_model.weights') print('acc: %.5f, best acc: %.5f' % (acc, self.best_val_acc))
def build(self, input_shape): output_dim = input_shape[-1] self.trans = self.add_weight(name='trans', shape=(output_dim, output_dim), initializer='glorot_uniform', trainable=True) if self.lr_multiplier != 1: K.set_value(self.trans, K.eval(self.trans) / self.lr_multiplier) self.trans = self.lr_multiplier * self.trans
def on_epoch_end(self, epoch, logs=None): trans = K.eval(self.CRF.trans) f1, precision, recall = evaluate(self.valid_data, self.model, self.CRF, self.i2tag_dict) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 self.model.save_weights('./best_model.weights') print('valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1))
def train_function(inputs): # 重新定义训练函数 # grads = embedding_gradients(inputs)[0] # Embedding梯度 # delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8) # 计算扰动 grads = embedding_gradients(inputs) # Embedding梯度 deltas = [ epsilon * grad / (np.sqrt((grad**2).sum()) + 1e-8) for grad in grads ] # 计算扰动 # 注入扰动 # K.set_value(embeddings, K.eval(embeddings) + delta) for embedding, delta in zip(embeddings, deltas): K.set_value(embedding, K.eval(embedding) + delta) outputs = old_train_function(inputs) # 梯度下降 # 删除扰动 # K.set_value(embeddings, K.eval(embeddings) - delta) # 删除扰动 for embedding, delta in zip(embeddings, deltas): K.set_value(embedding, K.eval(embedding) - delta) return outputs
def apply_ema_weights(self, bias_correction=True): """备份原模型权重,然后将平均权重应用到模型上去。 """ self.old_weights = K.batch_get_value(self.model_weights) ema_weights = K.batch_get_value(self.ema_weights) if bias_correction: iterations = K.eval(self.iterations) scale = 1.0 - np.power(self.ema_momentum, iterations) ema_weights = [weight / scale for weight in ema_weights] K.batch_set_value(zip(self.model_weights, ema_weights))
def on_epoch_end(self, epoch, logs=None): trans = K.eval(self.CRF.trans) self.NER.trans = trans # print(NER.trans) f1, precision, recall = self.evaluate() # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 self.model.save_weights('best_model_epoch_10.weights') print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1))
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) NER.trans = trans print(NER.trans) f1, precision, recall = evaluate(valid_data) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('./best_model.weights') print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1))
def on_epoch_end(self, epoch, logs=None): trans = K.eval(self.CRF.trans) self.NER.trans = trans # print(self.NER.trans) f1, precision, recall = evaluate(self.valid_data, self.recognize) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 self.model.save_weights(self.model_path) print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f \n' % (f1, precision, recall, self.best_val_f1))
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) NER.trans = trans # print(NER.trans) f1, precision, recall = evaluate(self.valid_data) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('./best_bilstm_model_{}.weights'.format(self.mode)) logging.info( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1) )
def on_epoch_end(self, epoch, logs=None): lr = K.get_value(self.model.optimizer.lr) trans = K.eval(self.CRF.trans) self.NER.trans = trans # print(NER.trans) f1, precision, recall = evaluate(self.valid_data, self.NER, self.model) if f1 >= self.best_val_f1: # 取得新的最优f1, 更新最优f1, 保存模型 self.best_val_f1 = f1 self.model.save_weights(self.model_save_to) print('F1: %.5f, P: %.5f, R: %.5f, best f1: %.5f, lr: %.6f\n' % (f1, precision, recall, self.best_val_f1, lr)) if True: # 学习率调整策略0(学习率逐步降低0.3,当学习率接近0时,固定学习率) if lr * 0.7 > 0.000001: K.set_value(self.model.optimizer.lr, lr * 0.7) if False: # 学习率调整策略1(学习率逐步降低0.3,当学习率接近0时,重置学习率) if lr * 0.7 >= 0.000001: K.set_value(self.model.optimizer.lr, lr * 0.7) else: K.set_value(self.model.optimizer.lr, 1e-4) if False: # 学习率调整策略2(若f1降低, 则降低学习率, 当学习率接近0时,重置学习率) if f1 >= self.pre_f1: # 若f1 优于上一轮,重置早停, bad_count和patience计数器 self.scheduler_patience = 1 self.early_stop_patience = 3 self.bad_count = 3 else: self.scheduler_patience -= 1 self.early_stop_patience -= 1 self.bad_count -= 1 if self.early_stop_patience == 0: pass # 去除早停 exit() # 若f1比上一轮的结果差,则降低学习率 if self.scheduler_patience == 0: # 若学习率过低, 则重置学习率 if lr * 0.7 >= 0.000001: K.set_value(self.model.optimizer.lr, lr * 0.7) else: K.set_value(self.model.optimizer.lr, 1e-4) # 调整后,重置patience计数器 self.scheduler_patience = 1 if self.bad_count == 0: K.set_value(self.model.optimizer.lr, 1e-4) # 调整后,重置bad_count计数器 self.bad_count = 3 # 更新pre_f1 self.pre_f1 = f1
def named_entity_recognize(text, model, CRF, id2class): """命名实体识别函数 """ tokens = tokenizer.tokenize(text) # print(tokens) # print('token', len(tokens)) while len(tokens) > 512: tokens.pop(-2) token_ids = tokenizer.tokens_to_ids(tokens) segment_ids = [0] * len(token_ids) nodes = model.predict([[token_ids], [segment_ids]])[0] trans = K.eval(CRF.trans) labels = ViterbiDecoder(trans).decode(nodes)[1:-1] return labels
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) NER.trans = trans f1, precision, recall = evaluate(valid_data) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('../medical_ner/' + str(self.best_val_f1) + 'medical_ner.weights') print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1)) f1, precision, recall = evaluate(test_data) print('test: f1: %.5f, precision: %.5f, recall: %.5f\n' % (f1, precision, recall))
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) NER.trans = trans # print(NER.trans) f1, precision, recall = evaluate(valid_data) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('./model/electra_base_ner_best_model.weights') test_f1, test_precision, test_recall = evaluate(test_data) print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1)) print('test: f1: %.5f, precision: %.5f, recall: %.5f\n' % (test_f1, test_precision, test_recall))
def on_epoch_end(self, epoch, logs=None): # self.model is auto set by keras yt, yp = [], [] trans = K.eval(CRF.trans) NER.trans = trans pred = self.smodel.predict(self.X, batch_size=16) for i, yseq in enumerate(self.Y): labels = NER.decode(pred[i]) yt.append([self.tags[z] for z in labels]) yp.append([self.tags[z] for z in yseq]) f1 = f1_score(yt, yp) self.best_f1 = max(self.best_f1, f1) accu = accuracy_score(yt, yp) print('\naccu: %.4f F1: %.4f BestF1: %.4f\n' % (accu, f1, self.best_f1)) print(classification_report(yt, yp))
def word_segment(text): """分词函数 """ tokens = tokenizer.tokenize(text) while len(tokens) > 512: tokens.pop(-2) token_ids = tokenizer.tokens_to_ids(tokens) segment_ids = [0] * len(token_ids) nodes = model.predict([[token_ids], [segment_ids]])[0] trans = K.eval(CRF.trans) labels = viterbi_decode(nodes, trans)[1:-1] words = [] for token, label in zip(tokens[1:-1], labels): if label < 2 or len(words) == 0: words.append([token]) else: words[-1].append(token) return [tokenizer.decode(w, w).replace(' ', '') for w in words]