Exemple #1
0
    def validate_one_batch(self, test_batch, task_name, log_writer, epoch):
        S, Q = test_batch
        Q_tag_ids = Q['tag_ids']
        S_tag_ids = S['tag_ids']
        Q_seq_len_list = Q['lens']
        Q_seq_len_list_plus2 = [x + 2 for x in Q_seq_len_list]
        Q_tag_ids_padded = pad_tag_ids(Q_tag_ids)
        S_tag_ids_padded = pad_tag_ids(S_tag_ids)
        Q['tag_ids'] = Q_tag_ids_padded
        S['tag_ids'] = S_tag_ids_padded

        logits = self([S, Q])
        loss = self.crf_loss(logits, Q_tag_ids_padded, Q_seq_len_list_plus2)
        pred_tags, pred_best_score = crf.crf_decode(
            potentials=logits,
            transition_params=self.trans_p,
            sequence_length=Q_seq_len_list_plus2)
        pred_tags_masked = seq_masking(pred_tags, Q_seq_len_list_plus2)
        p_tags_char, _ = get_id2tag_V2(pred_tags_masked,
                                       Q_seq_len_list_plus2,
                                       taskname=task_name)
        t_tags_char, _ = get_id2tag_V2(Q_tag_ids_padded,
                                       Q_seq_len_list_plus2,
                                       taskname=task_name)
        (P, R, F1), _ = evaluate(t_tags_char, p_tags_char, verbose=True)
        write_to_log(loss, P, R, F1, t_tags_char, log_writer, epoch)
        return (loss, pred_tags_masked, Q_tag_ids_padded, P, R, F1)
Exemple #2
0
 def __init__(self, batch_size, n_class, ball_num, w_size, embedding_size, words_size, hidden_size, layer_size):
     self._inputs = tf.keras.layers.Input(
         shape=(w_size, ball_num), batch_size=batch_size, name="red_inputs"
     )
     self._tag_indices = tf.keras.layers.Input(
         shape=(ball_num, ), batch_size=batch_size, dtype=tf.int32, name="red_tag_indices"
     )
     self._sequence_length = tf.keras.layers.Input(
         shape=(), batch_size=batch_size, dtype=tf.int32, name="sequence_length"
     )
     # 构建特征抽取
     embedding = tf.keras.layers.Embedding(words_size, embedding_size)(self._inputs)
     first_lstm = tf.convert_to_tensor(
         [tf.keras.layers.LSTM(hidden_size)(embedding[:, :, i, :]) for i in range(ball_num)]
     )
     first_lstm = tf.transpose(first_lstm, perm=[1, 0, 2])
     second_lstm = None
     for _ in range(layer_size):
         second_lstm = tf.keras.layers.LSTM(hidden_size, return_sequences=True)(first_lstm)
     self._outputs = tf.keras.layers.Dense(n_class)(second_lstm)
     # 构建损失函数
     self._log_likelihood, self._transition_params = crf_log_likelihood(
         self._outputs, self._tag_indices, self._sequence_length
     )
     self._loss = tf.reduce_sum(-self._log_likelihood)
     #  构建预测
     self._pred_sequence, self._viterbi_score = crf_decode(
         self._outputs, self._transition_params, self._sequence_length
     )
Exemple #3
0
    def get_viterbi_decoding(self, potentials, sequence_length):
        # decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`
        decode_tags, best_score = crf_decode(
            potentials, self.chain_kernel, sequence_length
        )

        return decode_tags, best_score
Exemple #4
0
 def predict_one(self, sentence):
     """
     对输入的句子进行ner识别,取batch中的第一行结果
     :param sentence:
     :return:
     """
     if self.configs.use_bert:
         X, y, att_mask, Sentence = self.dataManager.prepare_single_sentence(sentence)
         if self.configs.finetune:
             model_inputs = (X, att_mask)
         else:
             model_inputs = self.bert_model(X, attention_mask=att_mask)[0]
     else:
         X, y, Sentence = self.dataManager.prepare_single_sentence(sentence)
         model_inputs = X
     inputs_length = tf.math.count_nonzero(X, 1)
     logits, log_likelihood, transition_params = self.ner_model(
             inputs=model_inputs, inputs_length=inputs_length, targets=y)
     label_predicts, _ = crf_decode(logits, transition_params, inputs_length)
     label_predicts = label_predicts.numpy()
     sentence = Sentence[0, 0:inputs_length[0]]
     y_pred = [str(self.dataManager.id2label[val]) for val in label_predicts[0][0:inputs_length[0]]]
     if self.configs.use_bert:
         # 去掉[CLS]和[SEP]对应的位置
         y_pred = y_pred[1:-1]
     entities, suffixes, indices = extract_entity(sentence, y_pred, self.dataManager)
     return entities, suffixes, indices
 def accuracy(y_true, y_pred):
     shape = tf.shape(y_pred)
     sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
     viterbi_sequence, _ = crf_decode(y_pred, self.transitions,
                                      sequence_lengths)
     output = tf.keras.backend.one_hot(viterbi_sequence,
                                       self.output_dim)
     return tf.keras.metrics.categorical_accuracy(y_true, output)
Exemple #6
0
 def predict_one_batch(self, test_batch):
     seq_ids_padded, tag_ids_padded, seq_len_list = get_train_data_from_batch(test_batch)
     logits = self(seq_ids_padded)
     loss = self.crf_loss(logits, tag_ids_padded, seq_len_list)
     pred_tags, pred_best_score = crf.crf_decode(potentials=logits, transition_params=self.trans_p,
                                                     sequence_length=seq_len_list)
     pred_tags_masked = seq_masking(pred_tags, seq_len_list)
     return (loss, pred_tags_masked,tag_ids_padded)
Exemple #7
0
    def get_viterbi_decoding(self, potentials, sequence_length):
        # decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`
        decode_tags, best_score = crf_decode(potentials, self.chain_kernel,
                                             sequence_length)

        # covert to one-hot encoding
        decode_tags = tf.keras.backend.one_hot(decode_tags, self.units)

        return decode_tags, best_score
Exemple #8
0
    def inner_train_one_step(self, batches, epochNum, task_name, log_writer,
                             log_dir):
        '''
        :param self:
        :param batches: one batch data: [[sentence],[sentence],....]
                               sentence=[[chars],[charids],[tags],[tag_ids]]
        :param inner_epochNum:
        :return:
        '''
        # tf.summary.trace_on(graph=True,profiler=True)  # 开启Trace(可选)
        batch_Nums = len(batches)

        losses, P_ts, R_ts, F1_ts = [], [], [], []
        # =====run model=======
        with tqdm(total=batch_Nums) as bar:
            for batch_num in range(batch_Nums):
                batch = batches[batch_num]
                seq_ids_padded, tag_ids_padded, seq_len_list = get_train_data_from_batch(
                    batch)
                with tf.GradientTape() as tape:
                    # print(batch[0]) # 调试用
                    logits = self(seq_ids_padded)
                    loss = self.crf_loss(logits, tag_ids_padded, seq_len_list)
                    pred_tags, pred_best_score = crf.crf_decode(
                        potentials=logits,
                        transition_params=self.trans_p,
                        sequence_length=seq_len_list)
                grads = tape.gradient(loss, self.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(grads, self.trainable_variables))
                # optimizer.minimize(loss, [myModel_bilstm.trainable_variables])

                pred_tags_masked = seq_masking(pred_tags, seq_len_list)
                p_tags_char, p_tagsid_flatten = get_id2tag(pred_tags_masked,
                                                           taskname=task_name)
                t_tags_char, t_tagsid_flatten = get_id2tag(tag_ids_padded,
                                                           taskname=task_name)
                (P_t, R_t, F1_t), _ = evaluate(t_tags_char,
                                               p_tags_char,
                                               verbose=False)
                losses.append(loss)
                P_ts.append(P_t)
                R_ts.append(R_t)
                F1_ts.append(F1_t)
                print('train_loss:{}, train_P:{}'.format(loss, P_t))
                bar.update(1)
        with log_writer.as_default():
            tf.summary.scalar("loss", np.mean(losses), step=epochNum)
            tf.summary.scalar("P", np.mean(P_ts), step=epochNum)
            tf.summary.scalar("R", np.mean(R_ts), step=epochNum)
            tf.summary.scalar("F1", np.mean(F1_ts), step=epochNum)
    def call(self, inputs):

        #         if mask is not None:
        #             mask = K.cast(mask, K.floatx())

        seq_target, tag_target, seq_true, tag_true = inputs
        loss = self.multi_loss([seq_true, tag_true], [seq_target, tag_target])
        self.add_loss(loss)
        seq_input_shape, tag_input_shape = tf.slice(tf.shape(seq_target), [0],
                                                    [2]), tf.slice(
                                                        tf.shape(tag_target),
                                                        [0], [2])
        seq_mask, tag_mask = tf.ones(seq_input_shape), tf.ones(tag_input_shape)
        seq_sequence_lengths, tag_sequence_lengths = K.sum(
            K.cast(seq_mask,
                   'int32'), axis=-1), K.sum(K.cast(tag_mask, 'int32'),
                                             axis=-1)
        #         seq_length=y_pred.shape[1].value
        seq_target, _ = crf_decode(seq_target, self.trans_seq,
                                   seq_sequence_lengths)
        tag_target, _ = crf_decode(tag_target, self.trans_tag,
                                   tag_sequence_lengths)

        return [seq_target, tag_target]
Exemple #10
0
 def call(self, inputs, lengths=None):
     """
     parameters:
         inputs [B, L, T]
         lengths [B]
     returns: [B, L]
     """
     # inputs = inputs[:, 1:-1, :]
     shape = tf.shape(inputs)
     if lengths is None:
         lengths = tf.ones((shape[0], ), dtype=tf.int32) * shape[1]
     tags_id, _ = crf_decode(potentials=inputs,
                             transition_params=self.transition_params,
                             sequence_length=lengths)
     return tags_id
    def call(self, inputs, sequence_lengths=None, **kwargs):
        sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
        if sequence_lengths is not None:
            assert len(sequence_lengths.shape) == 2
            assert tf.convert_to_tensor(sequence_lengths).dtype == "int32"
            seq_len_shape = tf.convert_to_tensor(
                sequence_lengths).get_shape().as_list()
            assert seq_len_shape[1] == 1
            self.sequence_lengths = tf.keras.backend.flatten(sequence_lengths)
        else:
            self.sequence_lengths = tf.ones(
                tf.shape(inputs)[0], dtype=tf.int32) * (tf.shape(inputs)[1])

        viterbi_sequence, _ = crf_decode(sequences, self.transitions,
                                         self.sequence_lengths)
        output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
        return tf.keras.backend.in_train_phase(sequences, output)
Exemple #12
0
    def inner_train_one_step(self, batches, inner_iters, inner_epochNum,
                             outer_epochNum, task_name, log_writer):
        '''
        :param self:
        :param batches: one batch data: [[sentence],[sentence],....]
                               sentence=[[chars],[charids],[tags],[tag_ids]]
        :param inner_epochNum:
        :return:
        '''

        batches_len = len(batches)

        # =====run model=======
        for batch_num in range(batches_len):
            batch = batches[batch_num]
            seq_ids_padded, tag_ids_padded, seq_len_list = get_train_data_from_batch(
                batch)
            with tf.GradientTape() as tape:
                logits = self(seq_ids_padded)
                loss = self.crf_loss(logits, tag_ids_padded, seq_len_list)
                pred_tags, pred_best_score = crf.crf_decode(
                    potentials=logits,
                    transition_params=self.trans_p,
                    sequence_length=seq_len_list)
            grads = tape.gradient(loss, self.trainable_variables)
            self.optimizer.apply_gradients(zip(grads,
                                               self.trainable_variables))
            # optimizer.minimize(loss, [myModel_bilstm.trainable_variables])

        pred_tags_masked = seq_masking(pred_tags, seq_len_list)
        p_tags_char, p_tagsid_flatten = get_id2tag(pred_tags_masked,
                                                   taskname=task_name)
        t_tags_char, t_tagsid_flatten = get_id2tag(tag_ids_padded,
                                                   taskname=task_name)
        (P_t, R_t, F1_t), _ = evaluate(t_tags_char, p_tags_char, verbose=False)
        with log_writer.as_default():
            step = batch_num + 1 + inner_epochNum * batches_len
            tf.summary.scalar("loss",
                              loss,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("P", P_t, step=inner_epochNum)
            tf.summary.scalar("R", R_t, step=inner_epochNum)
            tf.summary.scalar("F", F1_t, step=inner_epochNum)
        return (loss, P_t)
Exemple #13
0
    def inner_train_one_step(self, batches, inner_epochNum, ckpt_manager, log_writer=None):
        '''
        :param self:
        :param batches: one batch data: [[sentence],[sentence],....]
                               sentence=[[chars],[charids],[tags],[tag_ids]]
        :param inner_epochNum:
        :return:
        '''

        batch_size = len(batches)
        print('========================batchsiez', batch_size)

        # =====run model=======
        with tqdm(total=batch_size) as bar:
            for batch_num in range(batch_size):

                batch = batches[batch_num]
                seq_ids_padded, tag_ids_padded, seq_len_list = get_train_data_from_batch(batch)
                with tf.GradientTape() as tape:
                    logits = self(seq_ids_padded)
                    loss = self.crf_loss(logits, tag_ids_padded, seq_len_list)
                    pred_tags, pred_best_score = crf.crf_decode(potentials=logits, transition_params=self.trans_p,
                                                                sequence_length=seq_len_list)
                grads = tape.gradient(loss, self.trainable_variables)
                self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
                # optimizer.minimize(loss, [myModel_bilstm.trainable_variables])

                bar.update(1)

            pred_tags_masked = seq_masking(pred_tags, seq_len_list)
            p_tags_char, p_tagsid_flatten = get_id2tag(pred_tags_masked)
            t_tags_char, t_tagsid_flatten = get_id2tag(tag_ids_padded)
            try:
                (P_t, R_t, F1_t),_ = evaluate(t_tags_char, p_tags_char, verbose=True)
            except Exception as e:
                print(e)
            with log_writer.as_default():
                step = batch_num + 1 + inner_epochNum * batch_size
                tf.summary.scalar("loss", loss, step=inner_epochNum)
                tf.summary.scalar("P", P_t, step=inner_epochNum)
                tf.summary.scalar("R", R_t, step=inner_epochNum)
                tf.summary.scalar("F", F1_t, step=inner_epochNum)

            ckpt_manager.save(checkpoint_number=inner_epochNum)
Exemple #14
0
    def validate_one_batches(self, test_batches, task_name, log_writer, epoch):

        seq_embeddings = test_batches['emb']
        tag_ids = test_batches['tag_ids']
        seq_len_list = test_batches['lens']
        seq_len_list_plus2 = [x + 2 for x in seq_len_list]
        tag_ids_padded = pad_tag_ids(tag_ids)

        logits = self(seq_embeddings)
        loss = self.crf_loss(logits, tag_ids_padded, seq_len_list_plus2)
        pred_tags, pred_best_score = crf.crf_decode(
            potentials=logits,
            transition_params=self.trans_p,
            sequence_length=seq_len_list_plus2)
        pred_tags_masked = seq_masking(pred_tags, seq_len_list_plus2)
        p_tags_char, _ = get_id2tag_V2(pred_tags_masked,
                                       seq_len_list_plus2,
                                       taskname=task_name)
        t_tags_char, _ = get_id2tag_V2(tag_ids_padded,
                                       seq_len_list_plus2,
                                       taskname=task_name)
        (P, R, F1), _ = evaluate(t_tags_char, p_tags_char, verbose=True)
        write_to_log(loss, P, R, F1, t_tags_char, log_writer, epoch)
        return (loss, pred_tags_masked, tag_ids_padded, P, R, F1)
Exemple #15
0
def train(configs, data_manager, logger):
    vocab_size = data_manager.max_token_number
    num_classes = data_manager.max_label_number
    learning_rate = configs.learning_rate
    max_to_keep = configs.checkpoints_max_to_keep
    checkpoints_dir = configs.checkpoints_dir
    checkpoint_name = configs.checkpoint_name
    best_f1_val = 0.0
    best_at_epoch = 0
    unprocessed = 0
    very_start_time = time.time()
    epoch = configs.epoch
    batch_size = configs.batch_size

    # 优化器大致效果Adagrad>Adam>RMSprop>SGD
    if configs.optimizer == 'Adagrad':
        optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)
    elif configs.optimizer == 'Adadelta':
        optimizer = tf.keras.optimizers.Adadelta(learning_rate=learning_rate)
    elif configs.optimizer == 'RMSprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif configs.optimizer == 'SGD':
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
    if configs.use_bert and not configs.finetune:
        bert_model = TFBertModel.from_pretrained('bert-base-chinese')
    else:
        bert_model = None

    train_dataset, val_dataset = data_manager.get_training_set()
    ner_model = NerModel(configs, vocab_size, num_classes)

    checkpoint = tf.train.Checkpoint(ner_model=ner_model)
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint,
        directory=checkpoints_dir,
        checkpoint_name=checkpoint_name,
        max_to_keep=max_to_keep)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print('Restored from {}'.format(checkpoint_manager.latest_checkpoint))
    else:
        print('Initializing from scratch.')

    num_val_iterations = int(math.ceil(1.0 * len(val_dataset) / batch_size))
    logger.info(('+' * 20) + 'training starting' + ('+' * 20))
    for i in range(epoch):
        start_time = time.time()
        logger.info('epoch:{}/{}'.format(i + 1, epoch))
        for step, batch in tqdm(
                train_dataset.shuffle(
                    len(train_dataset)).batch(batch_size).enumerate()):
            if configs.use_bert:
                X_train_batch, y_train_batch, att_mask_batch = batch
                if configs.finetune:
                    # 如果微调
                    model_inputs = (X_train_batch, att_mask_batch)
                else:
                    # 不进行微调,Bert只做特征的增强
                    model_inputs = bert_model(X_train_batch,
                                              attention_mask=att_mask_batch)[0]
            else:
                X_train_batch, y_train_batch = batch
                model_inputs = X_train_batch
            # 计算没有加入pad之前的句子的长度
            inputs_length = tf.math.count_nonzero(X_train_batch, 1)
            with tf.GradientTape() as tape:
                logits, log_likelihood, transition_params = ner_model(
                    inputs=model_inputs,
                    inputs_length=inputs_length,
                    targets=y_train_batch,
                    training=1)
                loss = -tf.reduce_mean(log_likelihood)
            # 定义好参加梯度的参数
            variables = ner_model.trainable_variables
            # 将Bert里面的pooler层的参数去掉
            variables = [var for var in variables if 'pooler' not in var.name]
            gradients = tape.gradient(loss, variables)
            # 反向传播,自动微分计算
            optimizer.apply_gradients(zip(gradients, variables))
            if step % configs.print_per_batch == 0 and step != 0:
                batch_pred_sequence, _ = crf_decode(logits, transition_params,
                                                    inputs_length)
                measures, _ = metrics(X_train_batch, y_train_batch,
                                      batch_pred_sequence, configs,
                                      data_manager, tokenizer)
                res_str = ''
                for k, v in measures.items():
                    res_str += (k + ': %.3f ' % v)
                logger.info('training batch: %5d, loss: %.5f, %s' %
                            (step, loss, res_str))

        # validation
        logger.info('start evaluate engines...')
        loss_values = []
        val_results = {}
        val_labels_results = {}
        for label in data_manager.suffix:
            val_labels_results.setdefault(label, {})
        for measure in configs.measuring_metrics:
            val_results[measure] = 0
        for label, content in val_labels_results.items():
            for measure in configs.measuring_metrics:
                val_labels_results[label][measure] = 0

        for val_batch in tqdm(val_dataset.batch(batch_size)):
            if configs.use_bert:
                X_val_batch, y_val_batch, att_mask_batch = val_batch
                if configs.finetune:
                    model_inputs = (X_val_batch, att_mask_batch)
                else:
                    model_inputs = bert_model(X_val_batch,
                                              attention_mask=att_mask_batch)[0]
            else:
                X_val_batch, y_val_batch = val_batch
                model_inputs = X_val_batch
            inputs_length_val = tf.math.count_nonzero(X_val_batch, 1)
            logits_val, log_likelihood_val, transition_params_val = ner_model(
                inputs=model_inputs,
                inputs_length=inputs_length_val,
                targets=y_val_batch)
            val_loss = -tf.reduce_mean(log_likelihood_val)
            batch_pred_sequence_val, _ = crf_decode(logits_val,
                                                    transition_params_val,
                                                    inputs_length_val)
            measures, lab_measures = metrics(X_val_batch, y_val_batch,
                                             batch_pred_sequence_val, configs,
                                             data_manager, tokenizer)

            for k, v in measures.items():
                val_results[k] += v
            for lab in lab_measures:
                for k, v in lab_measures[lab].items():
                    val_labels_results[lab][k] += v
            loss_values.append(val_loss)

        time_span = (time.time() - start_time) / 60
        val_res_str = ''
        val_f1_avg = 0
        for k, v in val_results.items():
            val_results[k] /= num_val_iterations
            val_res_str += (k + ': %.3f ' % val_results[k])
            if k == 'f1':
                val_f1_avg = val_results[k]
        for label, content in val_labels_results.items():
            val_label_str = ''
            for k, v in content.items():
                val_labels_results[label][k] /= num_val_iterations
                val_label_str += (k + ': %.3f ' % val_labels_results[label][k])
            logger.info('label: %s, %s' % (label, val_label_str))
        logger.info('time consumption:%.2f(min), %s' %
                    (time_span, val_res_str))

        if np.array(val_f1_avg).mean() > best_f1_val:
            unprocessed = 0
            best_f1_val = np.array(val_f1_avg).mean()
            best_at_epoch = i + 1
            checkpoint_manager.save()
            logger.info('saved the new best model with f1: %.3f' % best_f1_val)
        else:
            unprocessed += 1

        if configs.is_early_stop:
            if unprocessed >= configs.patient:
                logger.info(
                    'early stopped, no progress obtained within {} epochs'.
                    format(configs.patient))
                logger.info('overall best f1 is {} at {} epoch'.format(
                    best_f1_val, best_at_epoch))
                logger.info('total training time consumption: %.3f(min)' %
                            ((time.time() - very_start_time) / 60))
                return
    logger.info('overall best f1 is {} at {} epoch'.format(
        best_f1_val, best_at_epoch))
    logger.info('total training time consumption: %.3f(min)' %
                ((time.time() - very_start_time) / 60))
Exemple #16
0
def train(configs, data_manager, logger):
    domain_classes = data_manager.domain_class_number
    intent_classes = data_manager.intent_class_number
    slot_classes = data_manager.slot_class_number
    id2slot = data_manager.id2slot
    learning_rate = configs.learning_rate
    epoch = configs.epoch
    batch_size = configs.batch_size

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    bert_model = TFBertModel.from_pretrained('bert-base-chinese')
    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
    X_train, att_mask_train, domain_train, intent_train, slot_train, \
    X_val, att_mask_val, domain_val, intent_val, slot_val = data_manager.get_training_set()

    bilstm_crf_model = BiLSTM_CRFModel(configs, slot_classes)
    domain_model = DomainClassificationModel(configs, domain_classes)
    intent_model = IntentClassificationModel(configs, intent_classes)

    num_iterations = int(math.ceil(1.0 * len(X_train) / batch_size))
    num_val_iterations = int(math.ceil(1.0 * len(X_val) / batch_size))
    logger.info(('+' * 20) + 'training starting' + ('+' * 20))

    for i in range(epoch):
        start_time = time.time()
        logger.info('epoch:{}/{}'.format(i + 1, epoch))
        for iteration in tqdm(range(num_iterations)):
            X_train_batch, att_mask_train_batch, domain_train_batch, intent_train_batch, slot_train_batch \
                = data_manager.next_batch(X_train, att_mask_train, domain_train, intent_train, slot_train,
                                          start_index=iteration * batch_size)
            inputs_length = tf.math.count_nonzero(X_train_batch, 1)
            # 获得bert模型的输出
            bert_model_inputs = bert_model(X_train_batch, attention_mask=att_mask_train_batch)[0]
            with tf.GradientTape() as tape:
                # 槽位模型输入
                slot_logits, slot_log_likelihood, slot_transition_params = bilstm_crf_model.call(
                    inputs=bert_model_inputs, inputs_length=inputs_length, targets=slot_train_batch, training=1)
                slot_loss = -tf.reduce_mean(slot_log_likelihood)
                # 主题模型的输入
                domain_logits = domain_model.call(inputs=bert_model_inputs[:, 0, :], training=1)
                domain_loss_vec = tf.keras.losses.sparse_categorical_crossentropy(y_pred=domain_logits,
                                                                                  y_true=domain_train_batch)
                domain_loss = tf.reduce_mean(domain_loss_vec)
                # 意图模型的输入
                intent_logits = intent_model.call(inputs=bert_model_inputs[:, 0, :], training=1)
                intent_loss_vec = tf.keras.losses.sparse_categorical_crossentropy(y_pred=intent_logits,
                                                                                  y_true=intent_train_batch)
                intent_loss = tf.reduce_mean(intent_loss_vec)
                total_loss = domain_loss + intent_loss + 2 * slot_loss
            # 参数列表
            trainable_variables = bilstm_crf_model.trainable_variables + domain_model.trainable_variables + intent_model.trainable_variables
            # 定义好参加梯度的参数
            gradients = tape.gradient(total_loss, trainable_variables)
            # 反向传播,自动微分计算
            optimizer.apply_gradients(zip(gradients, trainable_variables))

            if iteration % configs.print_per_batch == 0 and iteration != 0:
                domain_predictions = tf.argmax(domain_logits, axis=-1)
                intent_predictions = tf.argmax(intent_logits, axis=-1)
                domain_measures = cal_metrics(y_true=domain_train_batch, y_pred=domain_predictions)
                intent_measures = cal_metrics(y_true=intent_train_batch, y_pred=intent_predictions)
                batch_pred_sequence, _ = crf_decode(slot_logits, slot_transition_params, inputs_length)
                slot_measures = cal_slots_metrics(X_train_batch, slot_train_batch, batch_pred_sequence, id2slot, tokenizer)
                domain_str = ''
                for k, v in domain_measures.items():
                    domain_str += (k + ': %.3f ' % v)
                logger.info('training batch: {}'.format (iteration))
                logger.info('domain_loss: %.5f, %s' % (domain_loss, domain_str))
                intent_str = ''
                for k, v in intent_measures.items():
                    intent_str += (k + ': %.3f ' % v)
                logger.info('intent_loss: %.5f, %s' % (intent_loss, intent_str))
                slot_str = ''
                for k, v in slot_measures.items():
                    slot_str += (k + ': %.3f ' % v)
                logger.info('slot_loss: %.5f, %s' % (slot_loss, slot_str))
        # validation
        logger.info('start evaluate engines...')
        slot_val_results = {'precision': 0, 'recall': 0, 'f1': 0}
        domain_val_results = {'precision': 0, 'recall': 0, 'f1': 0}
        intent_val_results = {'precision': 0, 'recall': 0, 'f1': 0}
        for iteration in tqdm(range(num_val_iterations)):
            X_val_batch, att_mask_val_batch, domain_val_batch, intent_val_batch, slot_val_batch \
                = data_manager.next_batch(X_val, att_mask_val, domain_val, intent_val, slot_val,
                                          start_index=iteration * batch_size)
            inputs_length = tf.math.count_nonzero(X_val_batch, 1)
            # 获得bert模型的输出
            bert_model_inputs = bert_model(X_val_batch, attention_mask=att_mask_val_batch)[0]
            # 槽位模型预测
            slot_logits, slot_log_likelihood, slot_transition_params = bilstm_crf_model.call(
                inputs=bert_model_inputs, inputs_length=inputs_length, targets=slot_val_batch)
            batch_pred_sequence, _ = crf_decode(slot_logits, slot_transition_params, inputs_length)
            slot_measures = cal_slots_metrics(X_val_batch, slot_val_batch, batch_pred_sequence, id2slot, tokenizer)
            # 主题模型的预测
            domain_logits = domain_model.call(inputs=bert_model_inputs[:, 0, :])
            domain_predictions = tf.argmax(domain_logits, axis=-1)
            domain_measures = cal_metrics(y_true=domain_val_batch, y_pred=domain_predictions)
            # 意图模型的预测
            intent_logits = intent_model.call(inputs=bert_model_inputs[:, 0, :])
            intent_predictions = tf.argmax(intent_logits, axis=-1)
            intent_measures = cal_metrics(y_true=intent_val_batch, y_pred=intent_predictions)

            for k, v in slot_measures.items():
                slot_val_results[k] += v
            for k, v in domain_measures.items():
                domain_val_results[k] += v
            for k, v in intent_measures.items():
                intent_val_results[k] += v

        time_span = (time.time() - start_time) / 60
        val_slot_str = ''
        val_domain_str = ''
        val_intent_str = ''
        for k, v in slot_val_results.items():
            slot_val_results[k] /= num_val_iterations
            val_slot_str += (k + ': %.3f ' % slot_val_results[k])
        for k, v in domain_val_results.items():
            domain_val_results[k] /= num_val_iterations
            val_domain_str += (k + ': %.3f ' % domain_val_results[k])
        for k, v in intent_val_results.items():
            intent_val_results[k] /= num_val_iterations
            val_intent_str += (k + ': %.3f ' % intent_val_results[k])
        logger.info('slot: {}'.format(val_slot_str))
        logger.info('domain: {}'.format(val_domain_str))
        logger.info('intent: {}'.format(val_intent_str))
        logger.info('time consumption:%.2f(min)' % time_span)
Exemple #17
0
    def inner_train_one_step(self,
                             batches,
                             inner_iters,
                             inner_epochNum,
                             outer_epochNum,
                             task_name,
                             log_writer,
                             mod='pretrain'):
        '''
        :param self:
        :param batches: one batch data: [[sentence],[sentence],....]
                               sentence=[emb:[],chars:[],tags:[],tag_ids:[]]
        :param inner_epochNum:
        :return:
        '''

        batches_len = len(batches)

        # =====run model=======
        for batch_num in range(batches_len):
            batch = batches[batch_num]
            seq_embeddings = batch['emb']
            tag_ids = batch['tag_ids']
            seq_len_list = batch['lens']
            seq_len_list_plus2 = [x + 2 for x in seq_len_list]
            tag_ids_padded = pad_tag_ids(tag_ids)

            with tf.GradientTape(persistent=True) as tape:
                logits = self(seq_embeddings)
                loss = self.crf_loss(logits, tag_ids_padded,
                                     seq_len_list_plus2)
                pred_tags, pred_best_score = crf.crf_decode(
                    potentials=logits,
                    transition_params=self.trans_p,
                    sequence_length=seq_len_list_plus2)
            grads = tape.gradient(loss, self.trans_p)
            self.optimizer.apply_gradients(zip(grads, self.trans_p))
            grads = tape.gradient(loss, self.dense.trainable_variables)
            self.optimizer.apply_gradients(
                zip(grads, self.dense.trainable_variables))
            if mod == 'pretrain':
                grads = tape.gradient(loss, self.BiLSTM.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(grads, self.trainable_variables))
            del tape
            # optimizer.minimize(loss, [myModel_bilstm.trainable_variables])

        pred_tags_masked = seq_masking(pred_tags, seq_len_list_plus2)
        p_tags_char, p_tagsid_flatten = get_id2tag_V2(pred_tags_masked,
                                                      seq_len_list_plus2,
                                                      taskname=task_name)
        t_tags_char, t_tagsid_flatten = get_id2tag_V2(tag_ids_padded,
                                                      seq_len_list_plus2,
                                                      taskname=task_name)
        (P_t, R_t, F1_t), _ = evaluate(t_tags_char, p_tags_char, verbose=False)
        with log_writer.as_default():
            # step = batch_num + 1 + inner_epochNum * batches_len
            tf.summary.scalar("loss",
                              loss,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("P",
                              P_t,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("R",
                              R_t,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("F",
                              F1_t,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
        return (loss, P_t, R_t, F1_t)
def train(configs, data_manager, logger):
    vocab_size = data_manager.max_token_number
    num_classes = data_manager.max_label_number
    learning_rate = configs.learning_rate
    max_to_keep = configs.checkpoints_max_to_keep
    checkpoints_dir = configs.checkpoints_dir
    checkpoint_name = configs.checkpoint_name
    best_f1_val = 0.0
    best_at_epoch = 0
    unprocessed = 0
    very_start_time = time.time()
    epoch = configs.epoch
    batch_size = configs.batch_size

    # 优化器大致效果Adagrad>Adam>RMSprop>SGD
    if configs.optimizer == 'Adagrad':
        optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)
    elif configs.optimizer == 'Adadelta':
        optimizer = tf.keras.optimizers.Adadelta(learning_rate=learning_rate)
    elif configs.optimizer == 'RMSprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif configs.optimizer == 'SGD':
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    if configs.use_bert:
        bert_model = TFBertModel.from_pretrained('bert-base-chinese')
        tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
        X_train, y_train, att_mask_train, X_val, y_val, att_mask_val = data_manager.get_training_set(
        )
    else:
        X_train, y_train, X_val, y_val = data_manager.get_training_set()
        att_mask_train, att_mask_val = np.array([]), np.array([])
        bert_model, tokenizer = None, None

    bilstm_crf_model = BiLSTM_CRFModel(configs, vocab_size, num_classes,
                                       configs.use_bert)
    checkpoint = tf.train.Checkpoint(model=bilstm_crf_model)
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint,
        directory=checkpoints_dir,
        checkpoint_name=checkpoint_name,
        max_to_keep=max_to_keep)

    num_iterations = int(math.ceil(1.0 * len(X_train) / batch_size))
    num_val_iterations = int(math.ceil(1.0 * len(X_val) / batch_size))
    logger.info(('+' * 20) + 'training starting' + ('+' * 20))
    for i in range(epoch):
        start_time = time.time()
        # shuffle train at each epoch
        sh_index = np.arange(len(X_train))
        np.random.shuffle(sh_index)
        X_train = X_train[sh_index]
        y_train = y_train[sh_index]
        if configs.use_bert:
            att_mask_train = att_mask_train[sh_index]
        logger.info('epoch:{}/{}'.format(i + 1, epoch))
        for iteration in tqdm(range(num_iterations)):
            if configs.use_bert:
                X_train_batch, y_train_batch, att_mask_batch = data_manager.next_batch(
                    X_train,
                    y_train,
                    att_mask_train,
                    start_index=iteration * batch_size)
                # 计算没有加入pad之前的句子的长度
                inputs_length = tf.math.count_nonzero(X_train_batch, 1)
                # 获得bert的模型输出
                model_inputs = bert_model(X_train_batch,
                                          attention_mask=att_mask_batch)[0]
            else:
                X_train_batch, y_train_batch = data_manager.next_batch(
                    X_train, y_train, start_index=iteration * batch_size)
                # 计算没有加入pad之前的句子的长度
                inputs_length = tf.math.count_nonzero(X_train_batch, 1)
                model_inputs = X_train_batch
            with tf.GradientTape() as tape:
                logits, log_likelihood, transition_params = bilstm_crf_model.call(
                    inputs=model_inputs,
                    inputs_length=inputs_length,
                    targets=y_train_batch,
                    training=1)
                loss = -tf.reduce_mean(log_likelihood)
            # 定义好参加梯度的参数
            gradients = tape.gradient(loss,
                                      bilstm_crf_model.trainable_variables)
            # 反向传播,自动微分计算
            optimizer.apply_gradients(
                zip(gradients, bilstm_crf_model.trainable_variables))
            if iteration % configs.print_per_batch == 0 and iteration != 0:
                batch_pred_sequence, _ = crf_decode(logits, transition_params,
                                                    inputs_length)
                measures, _ = metrics(X_train_batch, y_train_batch,
                                      batch_pred_sequence, configs,
                                      data_manager, tokenizer)
                res_str = ''
                for k, v in measures.items():
                    res_str += (k + ': %.3f ' % v)
                logger.info('training batch: %5d, loss: %.5f, %s' %
                            (iteration, loss, res_str))

        # validation
        logger.info('start evaluate engines...')
        loss_values = []
        val_results = {}
        val_labels_results = {}
        for label in data_manager.suffix:
            val_labels_results.setdefault(label, {})
        for measure in configs.measuring_metrics:
            val_results[measure] = 0
        for label, content in val_labels_results.items():
            for measure in configs.measuring_metrics:
                val_labels_results[label][measure] = 0

        for iteration in tqdm(range(num_val_iterations)):
            if configs.use_bert:
                X_val_batch, y_val_batch, att_mask_batch = data_manager.next_batch(
                    X_val, y_val, att_mask_val, iteration * batch_size)
                inputs_length_val = tf.math.count_nonzero(X_val_batch, 1)
                # 获得bert的模型输出
                model_inputs = bert_model(X_val_batch,
                                          attention_mask=att_mask_batch)[0]
            else:
                X_val_batch, y_val_batch = data_manager.next_batch(
                    X_val, y_val, iteration * batch_size)
                inputs_length_val = tf.math.count_nonzero(X_val_batch, 1)
                model_inputs = X_val_batch
            logits_val, log_likelihood_val, transition_params_val = bilstm_crf_model.call(
                inputs=model_inputs,
                inputs_length=inputs_length_val,
                targets=y_val_batch)
            val_loss = -tf.reduce_mean(log_likelihood_val)
            batch_pred_sequence_val, _ = crf_decode(logits_val,
                                                    transition_params_val,
                                                    inputs_length_val)
            measures, lab_measures = metrics(X_val_batch, y_val_batch,
                                             batch_pred_sequence_val, configs,
                                             data_manager, tokenizer)

            for k, v in measures.items():
                val_results[k] += v
            for lab in lab_measures:
                for k, v in lab_measures[lab].items():
                    val_labels_results[lab][k] += v
            loss_values.append(val_loss)

        time_span = (time.time() - start_time) / 60
        val_res_str = ''
        dev_f1_avg = 0
        for k, v in val_results.items():
            val_results[k] /= num_val_iterations
            val_res_str += (k + ': %.3f ' % val_results[k])
            if k == 'f1':
                dev_f1_avg = val_results[k]
        for label, content in val_labels_results.items():
            val_label_str = ''
            for k, v in content.items():
                val_labels_results[label][k] /= num_val_iterations
                val_label_str += (k + ': %.3f ' % val_labels_results[label][k])
            logger.info('label: %s, %s' % (label, val_label_str))
        logger.info('time consumption:%.2f(min), %s' %
                    (time_span, val_res_str))

        if np.array(dev_f1_avg).mean() > best_f1_val:
            unprocessed = 0
            best_f1_val = np.array(dev_f1_avg).mean()
            best_at_epoch = i + 1
            checkpoint_manager.save()
            logger.info('saved the new best model with f1: %.3f' % best_f1_val)
        else:
            unprocessed += 1

        if configs.is_early_stop:
            if unprocessed >= configs.patient:
                logger.info(
                    'early stopped, no progress obtained within {} epochs'.
                    format(configs.patient))
                logger.info('overall best f1 is {} at {} epoch'.format(
                    best_f1_val, best_at_epoch))
                logger.info('total training time consumption: %.3f(min)' %
                            ((time.time() - very_start_time) / 60))
                return
    logger.info('overall best f1 is {} at {} epoch'.format(
        best_f1_val, best_at_epoch))
    logger.info('total training time consumption: %.3f(min)' %
                ((time.time() - very_start_time) / 60))
Exemple #19
0
    def inner_train_one_step(self, batch, inner_iters, inner_epochNum,
                             outer_epochNum, task_name, log_writer):
        '''
        :param self:
        :param batch: batches = [S,Q]
                        S,Q: [[sentence],[sentence],....]
                        sentence=[emb:[],chars:[],tags:[],tag_ids:[]]
        :param inner_epochNum:
        :return:
        '''

        batches_len = len(batch)

        # =====run model=======

        S, Q = batch
        Q_tag_ids = Q['tag_ids']
        S_tag_ids = S['tag_ids']
        Q_seq_len_list = Q['lens']
        Q_seq_len_list_plus2 = [x + 2 for x in Q_seq_len_list]
        Q_tag_ids_padded = pad_tag_ids(Q_tag_ids)
        S_tag_ids_padded = pad_tag_ids(S_tag_ids)
        Q['tag_ids'] = Q_tag_ids_padded
        S['tag_ids'] = S_tag_ids_padded

        with tf.GradientTape() as tape:
            logits = self([S, Q])
            loss = self.crf_loss(logits, Q_tag_ids_padded,
                                 Q_seq_len_list_plus2)
            pred_tags, pred_best_score = crf.crf_decode(
                potentials=logits,
                transition_params=self.trans_p,
                sequence_length=Q_seq_len_list_plus2)
        grads = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        # optimizer.minimize(loss, [myModel_bilstm.trainable_variables])

        pred_tags_masked = seq_masking(pred_tags, Q_seq_len_list_plus2)
        p_tags_char, p_tagsid_flatten = get_id2tag_V2(pred_tags_masked,
                                                      Q_seq_len_list_plus2,
                                                      taskname=task_name)
        t_tags_char, t_tagsid_flatten = get_id2tag_V2(Q_tag_ids_padded,
                                                      Q_seq_len_list_plus2,
                                                      taskname=task_name)
        (P_t, R_t, F1_t), _ = evaluate(t_tags_char, p_tags_char, verbose=False)
        with log_writer.as_default():
            # step = batch_num + 1 + inner_epochNum * batches_len
            tf.summary.scalar("loss",
                              loss,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("P",
                              P_t,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("R",
                              R_t,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
            tf.summary.scalar("F",
                              F1_t,
                              step=inner_epochNum +
                              outer_epochNum * inner_iters)
        return (loss, P_t, R_t, F1_t)
Exemple #20
0
def train_one_epoch(mymodel,optimizer,batches, epoch_num=1, checkpoints_dir='checkpoints00',ckpt_manager=None,log_writer=None):
    '''
    :param mymodel:
    :param batches: one batch data: [[sentence],[sentence],....]
                           sentence=[[chars],[charids],[tags],[tag_ids]]
    :param epoch_num:
    :return:
    '''


    if ckpt_manager is None:
        checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=mymodel)
        ckpt_manager = tf.train.CheckpointManager(checkpoint, directory=checkpoints_dir, max_to_keep=10)

    batch_size = len(batches)
    print('========================batchsiez',batch_size)

    # =====run model=======
    with tqdm(total=batch_size) as bar:
        for batch_num in range(batch_size):

            batch = batches[batch_num]
            seq_ids_padded, tag_ids_padded, seq_len_list = get_train_data_from_batch(batch)
            with tf.GradientTape() as tape:
                logits = mymodel(seq_ids_padded)
                loss = mymodel.crf_loss(logits, tag_ids_padded, seq_len_list)
                pred_tags, pred_best_score = crf.crf_decode(potentials=logits, transition_params=mymodel.trans_p,
                                                            sequence_length=seq_len_list)
            grads = tape.gradient(loss, mymodel.trainable_variables)
            optimizer.apply_gradients(zip(grads, mymodel.trainable_variables))
            # optimizer.minimize(loss, [myModel_bilstm.trainable_variables])

            bar.update(1)

            if batch_num % 2 == 0:
                pred_tags_masked = seq_masking(pred_tags, seq_len_list)
                p_tags = findall_tag(pred_tags_masked, seq_len_list)
                t_tags = findall_tag(tag_ids_padded, seq_len_list)
                (P_train, R_train, F1_train) = P_R_F1_score(p_tags, t_tags)
                p_tags_char , p_tagsid_flatten = get_id2tag(pred_tags_masked)
                t_tags_char,t_tagsid_flatten = get_id2tag(tag_ids_padded)
                try:
                    P_C, R_C, F1_C = evaluate(t_tags_char,p_tags_char,verbose=True)
                except Exception as e:
                    print(e)

                step = batch_num + 1 + epoch_num * batch_size
                tf.summary.scalar("train_loss", loss, step=step)
                tf.summary.scalar("P_train", P_train, step=step)
                tf.summary.scalar("R_train", R_train, step=step)
                tf.summary.scalar("F1_train", F1_train, step=step)
                tf.summary.scalar("P_C", P_C, step=step)
                tf.summary.scalar("R_C", R_C, step=step)
                tf.summary.scalar("F1_C", F1_C, step=step)

                print(
                    'epoch:{}\t\tbatch:{}\t\ttrain_loss:{:.2f}\t\ttrain_P :{:.8f}\t\ttrain_R :{:.8f}\t\ttrain_F1 :{:.8f}\t\t'.format(
                        epoch_num,
                        batch_num,
                        loss,
                        P_train, R_train, F1_train))
        ckpt_manager.save(checkpoint_number=epoch_num)