def build(self, input_shape): super(ConditionalRandomField, self).build(input_shape) label_size = input_shape[-1] self._trans = self.add_weight(name='crf_trans', shape=(label_size, label_size), initializer='glorot_uniform', trainable=True) if self.lr_multiplier != 1: K.set_value(self._trans, K.eval(self._trans) / self.lr_multiplier)
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) wordseg.trans = trans print(trans) acc = evaluate(val_data) if acc > self.best_acc: self.best_acc = acc model.save_weights('./best_model.weights') print('acc is: {:.3f}, best acc is :{:.4f}'.format(acc, self.best_acc))
def build(self, input_shape): assert len(input_shape) >= 2 input_dim = input_shape[-1] self._kernel = self.add_weight(shape=(input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self._bias = self.add_weight(shape=(self.units,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) self.built = True if self.lr_multiplier != 1: K.set_value(self._kernel, K.eval(self._kernel) / self.lr_multiplier) K.set_value(self._bias, K.eval(self._bias) / self.lr_multiplier)
def on_epoch_end(self, epoch, logs=None): trans = K.eval(self.model.layers[-1].layers[-1].trans) NER.trans = trans print(NER.trans) f1, precision, recall = evaluate(valid_data, self.model) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 self.model.save_weights(self.model_name) print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1)) f1, precision, recall = evaluate(test_data, self.model) print('test: f1: %.5f, precision: %.5f, recall: %.5f\n' % (f1, precision, recall))
def on_epoch_end(self, epoch, logs=None): trans = K.eval(CRF.trans) NER.trans = trans print(NER.trans) f1, precision, recall = evaluate(valid_data) # 保存最优 if f1 >= self.best_val_f1: self.best_val_f1 = f1 model.save_weights('./best_model.weights') print( 'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % (f1, precision, recall, self.best_val_f1)) f1, precision, recall = evaluate(test_data) print('test: f1: %.5f, precision: %.5f, recall: %.5f\n' % (f1, precision, recall))
mapping = tokenizer.rematch(data, tokens) token_ids = tokenizer.tokens_to_ids(tokens) segs = [0] * len(token_ids) pre = model.predict([[token_ids], [segs]])[0] labels = self.decode(pre) words = [] for i, label in enumerate(labels[1:-1]): if label < 2 or len(words) == 0: words.append([i + 1]) else: words[-1].append(i + 1) return [data[mapping[w[0]][0]:mapping[w[-1]][-1] + 1] for w in words] wordseg = WordSeg(trans=K.eval(CRF.trans), starts=[0], ends=[0]) def evaluate(data): """简单评测""" total, right = 1e-10, 1e-10 for true in tqdm(data): pre = wordseg.segment(''.join(true)) w_pre = set(pre) w_true = set(true) total += len(w_true) right += len(w_pre & w_true) return right / total
if label > 0: if label % 3 == 1: starting = True entities.append([[i], id2label[(label - 1) // 3]]) elif starting: entities[-1][0].append(i) else: starting = False else: starting = False return [(text[mapping[w[0]][0]:mapping[w[-1]][-1] + 1], l) for w, l in entities] NER = NamedEntityRecognizer(trans=K.eval(CRF.trans), starts=[0], ends=[0]) def evaluate(data, model): """评测函数 """ X, Y, Z = 1e-10, 1e-10, 1e-10 for d in tqdm(data): text = ''.join([i[0] for i in d]) R = set(NER.recognize(text, model)) T = set([tuple(i) for i in d if i[1] != 'O']) X += len(R & T) Y += len(R) Z += len(T) f1, precision, recall = 2 * X / (Y + Z), X / Y, X / Z return f1, precision, recall