예제 #1
0
    def test(self):
        # turn on the testing mode; clean up the history
        network = self._model
        self._mode(network, is_test=True)
        data_iterator = Batch(self.data, self.batch_size, sampler=SequentialSampler(), as_numpy=False)
        eval_results = {}
        try:
            with torch.no_grad():
                for batch_x, batch_y in data_iterator:
                    _move_dict_value_to_device(batch_x, batch_y, device=self._model_device)
                    pred_dict = self._data_forward(self._predict_func, batch_x)
                    if not isinstance(pred_dict, dict):
                        raise TypeError(f"The return value of {get_func_signature(self._predict_func)} " 
                                                         f"must be `dict`, got {type(pred_dict)}.")
                    for metric in self.metrics:
                        metric(pred_dict, batch_y)
                for metric in self.metrics:
                    eval_result = metric.get_metric()
                    if not isinstance(eval_result, dict):
                        raise TypeError(f"The return value of {get_func_signature(metric.get_metric)} must be "
                                        f"`dict`, got {type(eval_result)}")
                    metric_name = metric.__class__.__name__
                    eval_results[metric_name] = eval_result
        except CheckError as e:
            prev_func_signature = get_func_signature(self._predict_func)
            _check_loss_evaluate(prev_func_signature=prev_func_signature, func_signature=e.func_signature,
                                 check_res=e.check_res, pred_dict=pred_dict, target_dict=batch_y,
                                 dataset=self.data, check_level=0)

        if self.verbose >= 1:
            print("[tester] \n{}".format(self._format_eval_results(eval_results)))
        self._mode(network, is_test=False)
        return eval_results
예제 #2
0
def get_predictions(pred_model, input_data, batch_size, num_workers=4):
    texts = list(list(map(lambda x: vocabs['char'].to_word(x), sample['chars'])) for sample in input_data)
    seq_lens = [sample['seq_len'] for sample in input_data]
    pred_model.to(device)
    sampler = SequentialSampler()
    data_iterator = DataSetIter(dataset=input_data, batch_size=batch_size, sampler=sampler,
                                num_workers=num_workers)
    with torch.no_grad():
        preds, golds = [], []
        pred_model.eval()

        for batch_x, batch_y in data_iterator:
            _move_dict_value_to_device(batch_x, batch_y, device=device)
            x = _build_args(pred_model.forward, **batch_x)
            with torch.no_grad():
                y = pred_model.forward(**x)
            preds.extend(list(map(list, y['pred'].cpu().numpy())))
            golds.extend(list(map(list, batch_y['target'].cpu().numpy())))
    pred_seqs = list(list(map(lambda _y: vocabs['label'].to_word(_y), pred)) for pred in preds)
    gold_seqs = list(list(map(lambda _y: vocabs['label'].to_word(_y), pred)) for pred in golds)
    case_result = []
    for pred_seq, gold_seq, word_seq, seq_len in zip(pred_seqs, gold_seqs, texts, seq_lens):
        pred_seq = pred_seq[:seq_len]
        gold_seq = gold_seq[:seq_len]
        case_result.append((''.join(word_seq), extract_kvpairs_in_bmoes(gold_seq, word_seq),
                           extract_kvpairs_in_bmoes(pred_seq, word_seq)))

    # output for case study
    os.makedirs(f'../output/case_study/{args.dataset}', exist_ok=True)
    fout = open(f'../output/case_study/{args.dataset}/{args.dataset}_bert{args.use_bert}_scheme{args.new_tag_scheme}_ple{args.ple_channel_num}_plstm{int(args.use_ple_lstm)}_trainrate{args.train_dataset_rate}.casestudy', 'w', encoding='utf8')
    for word_seq, gold_pair, pred_pair in case_result:
        fout.write(word_seq + '\n' + str(gold_pair) + '\n' + str(pred_pair) + '\n\n')
예제 #3
0
def is_phrase_match_BERT(phrase1, phrase2):
    """
    Determine if two phrases match
    :param phrase1: phrase1
    :param phrase2: phrase2
    """
    from fastNLP import DataSetIter, DataSet
    from fastNLP.core.utils import _move_dict_value_to_device
    from my_bert_match import addWords, addWordPiece, processItem, processNum, addSeqlen
    # 0 for not match,1 for match
    testset = DataSet({"raw_words": [f"{phrase1}::{phrase2}"]})
    testset.apply(addWords, new_field_name="p_words")
    testset.apply(addWordPiece, new_field_name="t_words")
    testset.apply(processItem, new_field_name="word_pieces")
    testset.apply(processNum, new_field_name="word_nums")
    testset.apply(addSeqlen, new_field_name="seq_len")
    testset.field_arrays["word_pieces"].is_input = True
    testset.field_arrays["seq_len"].is_input = True
    testset.field_arrays["word_nums"].is_input = True
    # print(testset)
    with torch.no_grad():
        bert_model.eval()
        test_batch = DataSetIter(batch_size=1, dataset=testset, sampler=None)
        outputs = []
        for batch_x, batch_y in test_batch:
            _move_dict_value_to_device(batch_x, batch_y, device=device)
            outputs.append(bert_model.forward(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"])['pred'])
        outputs = torch.cat(outputs)
        outputs = torch.nn.functional.softmax(outputs, dim=1)
        return ["Not Match", "Related", "Match"][outputs.argmax().item()]
예제 #4
0
def test():
    from fastNLP import DataSetIter, DataSet
    # 0 for not match,1 for match
    testset = DataSet({"raw_words": ["5::five"]})
    testset.apply(addWords, new_field_name="p_words")
    testset.apply(addWordPiece, new_field_name="t_words")
    testset.apply(processItem, new_field_name="word_pieces")
    testset.apply(processNum, new_field_name="word_nums")
    testset.apply(addSeqlen, new_field_name="seq_len")
    testset.field_arrays["word_pieces"].is_input = True
    testset.field_arrays["seq_len"].is_input = True
    testset.field_arrays["word_nums"].is_input = True
    # print(testset)
    from fastNLP.io import ModelLoader
    loader = ModelLoader()
    if torch.cuda.is_available():
        model = loader.load_pytorch_model(
            "../models/bert_model_max_triple.pkl")
    else:
        model = torch.load("../models/bert_model_max_triple.pkl",
                           map_location="cpu")

    model.eval()
    test_batch = DataSetIter(batch_size=1, dataset=testset, sampler=None)
    outputs = []
    for batch_x, batch_y in test_batch:
        _move_dict_value_to_device(batch_x,
                                   batch_y,
                                   device=_get_model_device(model))
        outputs.append(
            model.forward(batch_x["word_pieces"], batch_x["word_nums"],
                          batch_x["seq_len"])['pred'])
    outputs = torch.cat(outputs)
    outputs = torch.nn.functional.softmax(outputs, dim=1)
    return outputs
예제 #5
0
 def _tqdm_train(self):
     self.step = 0
     data_iterator = Batch(self.train_data,
                           batch_size=self.batch_size,
                           sampler=self.sampler,
                           as_numpy=False)
     total_steps = data_iterator.num_batches * self.n_epochs
     with tqdm(total=total_steps,
               postfix='loss:{0:<6.5f}',
               leave=False,
               dynamic_ncols=True) as pbar:
         avg_loss = 0
         for epoch in range(1, self.n_epochs + 1):
             pbar.set_description_str(
                 desc="Epoch {}/{}".format(epoch, self.n_epochs))
             for batch_x, batch_y in data_iterator:
                 _move_dict_value_to_device(batch_x,
                                            batch_y,
                                            device=self._model_device)
                 prediction = self._data_forward(self.model, batch_x)
                 loss = self._compute_loss(prediction, batch_y)
                 avg_loss += loss.item()
                 self._grad_backward(loss)
                 self._update()
                 self._summary_writer.add_scalar("loss",
                                                 loss.item(),
                                                 global_step=self.step)
                 for name, param in self.model.named_parameters():
                     if param.requires_grad:
                         self._summary_writer.add_scalar(
                             name + "_mean",
                             param.mean(),
                             global_step=self.step)
                         # self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.step)
                         # self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=self.step)
                 if (self.step + 1) % self.print_every == 0:
                     pbar.set_postfix_str("loss:{0:<6.5f}".format(
                         avg_loss / self.print_every))
                     avg_loss = 0
                     pbar.update(self.print_every)
                 self.step += 1
                 if self.validate_every > 0 and self.step % self.validate_every == 0 \
                         and self.dev_data is not None:
                     eval_res = self._do_validation(epoch=epoch,
                                                    step=self.step)
                     eval_str = "Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, total_steps) + \
                                self.tester._format_eval_results(eval_res)
                     pbar.write(eval_str)
             if self.validate_every < 0 and self.dev_data:
                 eval_res = self._do_validation(epoch=epoch, step=self.step)
                 eval_str = "Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, total_steps) + \
                            self.tester._format_eval_results(eval_res)
                 pbar.write(eval_str)
             if epoch != self.n_epochs:
                 data_iterator = Batch(self.train_data,
                                       batch_size=self.batch_size,
                                       sampler=self.sampler,
                                       as_numpy=False)
         pbar.close()
예제 #6
0
    def predict(self, data: DataSet, seq_len_field_name=None):
        r"""用已经训练好的模型进行inference.

        :param fastNLP.DataSet data: 待预测的数据集
        :param str seq_len_field_name: 表示序列长度信息的field名字
        :return: dict dict里面的内容为模型预测的结果
        """
        if not isinstance(data, DataSet):
            raise ValueError("Only Dataset class is allowed, not {}.".format(
                type(data)))
        if seq_len_field_name is not None and seq_len_field_name not in data.field_arrays:
            raise ValueError("Field name {} not found in DataSet {}.".format(
                seq_len_field_name, data))

        prev_training = self.network.training
        self.network.eval()
        network_device = _get_model_device(self.network)
        batch_output = defaultdict(list)
        data_iterator = DataSetIter(data,
                                    batch_size=self.batch_size,
                                    sampler=SequentialSampler(),
                                    as_numpy=False)

        if hasattr(self.network, "predict"):
            predict_func = self.network.predict
        else:
            predict_func = self.network.forward

        with torch.no_grad():
            for batch_x, _ in data_iterator:
                _move_dict_value_to_device(batch_x, _, device=network_device)
                refined_batch_x = _build_args(predict_func, **batch_x)
                prediction = predict_func(**refined_batch_x)

                if seq_len_field_name is not None:
                    seq_lens = batch_x[seq_len_field_name].tolist()

                for key, value in prediction.items():
                    value = value.cpu().numpy()
                    if len(value.shape) == 1 or (len(value.shape) == 2
                                                 and value.shape[1] == 1):
                        batch_output[key].extend(value.tolist())
                    else:
                        if seq_len_field_name is not None:
                            tmp_batch = []
                            for idx, seq_len in enumerate(seq_lens):
                                tmp_batch.append(value[idx, :seq_len])
                            batch_output[key].extend(tmp_batch)
                        else:
                            batch_output[key].append(value)

        self.network.train(prev_training)
        return batch_output
예제 #7
0
def produceCandidateTripleSlow(raw_phrase, Candidate_phrases, model,
                               Candidate_hpos_sub, threshold):
    """
    使用BERT判断Candidate_phrases中哪个与raw_phrase语义最接近;基于最大值方式;适用于单个处理
    """
    from fastNLP.core.utils import _move_dict_value_to_device
    from fastNLP.core.utils import _get_model_device
    from fastNLP import DataSet
    from fastNLP import DataSetIter
    from my_bert_match import addWordPiece, addSeqlen, addWords, processItem, processNum
    p_Candidate_phrases = [
        raw_phrase + "::" + item for item in Candidate_phrases
    ]
    Candidate_dataset = DataSet({"raw_words": p_Candidate_phrases})
    Candidate_dataset.apply(addWords, new_field_name="p_words")
    Candidate_dataset.apply(addWordPiece, new_field_name="t_words")
    Candidate_dataset.apply(processItem, new_field_name="word_pieces")
    Candidate_dataset.apply(processNum, new_field_name="word_nums")
    Candidate_dataset.apply(addSeqlen, new_field_name="seq_len")
    Candidate_dataset.field_arrays["word_pieces"].is_input = True
    Candidate_dataset.field_arrays["seq_len"].is_input = True
    Candidate_dataset.field_arrays["word_nums"].is_input = True
    test_batch = DataSetIter(batch_size=10,
                             dataset=Candidate_dataset,
                             sampler=None)

    outputs = []
    for batch_x, batch_y in test_batch:
        _move_dict_value_to_device(batch_x,
                                   batch_y,
                                   device=_get_model_device(model))
        outputs.append(
            model.forward(batch_x["word_pieces"], batch_x["word_nums"],
                          batch_x["seq_len"])['pred'])
    outputs = torch.cat(outputs)
    outputs = torch.nn.functional.softmax(outputs,
                                          dim=1).cpu().detach().numpy()

    results_2 = np.array([item[2] for item in outputs])
    results_1 = np.array([item[1] for item in outputs])

    # 如果这里已经能找到精确匹配的就直接输出
    if max(results_2) >= threshold:
        return Candidate_hpos_sub[int(
            np.argmax(results_2))], max(results_2), "2"

    if max(results_1) >= threshold:
        return Candidate_hpos_sub[int(
            np.argmax(results_1))], max(results_1), "1"

    return "None", None, "0"
예제 #8
0
    def _print_train(self):
        epoch = 1
        start = time.time()
        while epoch <= self.n_epochs:

            data_iterator = Batch(self.train_data,
                                  batch_size=self.batch_size,
                                  sampler=self.sampler,
                                  as_numpy=False)

            for batch_x, batch_y in data_iterator:
                # TODO 这里可能会遇到问题,万一用户在model内部修改了prediction的device就会有问题
                _move_dict_value_to_device(batch_x,
                                           batch_y,
                                           device=self._model_device)
                prediction = self._data_forward(self.model, batch_x)
                loss = self._compute_loss(prediction, batch_y)
                self._grad_backward(loss)
                self._update()
                self._summary_writer.add_scalar("loss",
                                                loss.item(),
                                                global_step=self.step)
                for name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self._summary_writer.add_scalar(name + "_mean",
                                                        param.mean(),
                                                        global_step=self.step)
                        # self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.step)
                        # self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=self.step)
                if self.print_every > 0 and self.step % self.print_every == 0:
                    end = time.time()
                    diff = timedelta(seconds=round(end - start))
                    print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time:  {}".format(
                        epoch, self.step, loss.data, diff)
                    print(print_output)

                if (self.validate_every > 0
                        and self.step % self.validate_every == 0
                        and self.dev_data is not None):
                    self._do_validation(epoch=epoch, step=self.step)

                self.step += 1

            # validate_every override validation at end of epochs
            if self.dev_data and self.validate_every <= 0:
                self._do_validation(epoch=epoch, step=self.step)
            epoch += 1
예제 #9
0
    def predict(self, data: DataSet, seq_len_field_name=None):
        r"""
        """
        if not isinstance(data, DataSet):
            raise ValueError(
                "Only Dataset class is allowed, not {}.".format(type(data)))
        if seq_len_field_name is not None and seq_len_field_name not in data.field_arrays:
            raise ValueError("Field name {} not found in DataSet {}.".format(
                seq_len_field_name, data))

        self.network.eval()  # self.network.module for multi-GPU
        network_device = _get_model_device(self.network)
        batch_output = defaultdict(list)
        data_iterator = DataSetIter(
            data, batch_size=self.batch_size, sampler=SequentialSampler(), as_numpy=False)

        # predict_func = self.network.module.predict  # self.network.module for
        # multi-GPU
        try:
            predict_func = self.network.predict
        except ModuleAttributeError:
            predict_func = self.network.module.predict

        with torch.no_grad():
            #            for batch_x, _ in tqdm(data_iterator):
            for batch_x, _ in tqdm(data_iterator, total=len(data_iterator)):
                _move_dict_value_to_device(batch_x, _, device=network_device)
                refined_batch_x = _build_args(predict_func, **batch_x)
                prediction = predict_func(**refined_batch_x)
                if seq_len_field_name is not None:
                    seq_lens = batch_x[seq_len_field_name].tolist()

                for key, value in prediction.items():
                    value = value.cpu().numpy()
                    if len(value.shape) == 1 or (
                            len(value.shape) == 2 and value.shape[1] == 1):
                        batch_output[key].extend(value.tolist())
                    else:
                        if seq_len_field_name is not None:
                            tmp_batch = []
                            for idx, seq_len in enumerate(seq_lens):
                                tmp_batch.append(value[idx, :seq_len])
                            batch_output[key].extend(tmp_batch)
                        else:
                            batch_output[key].append(value)
        return batch_output
예제 #10
0
def produceCandidateTriple(Candidate_hpos_sub_total, model, hpo_tree,
                           threshold):
    """
    使用BERT判断Candidate_phrases中哪个与raw_phrase语义最接近;基于最大值方式
    :param Candidate_hpos_sub_total: 输出的短语及候选HPO嵌套列表
    :param model:
    :param hpo_tree:
    :param threshold: 用作该模型输出阈值
    :return:
    """
    from fastNLP.core.utils import _move_dict_value_to_device
    from fastNLP.core.utils import _get_model_device
    from fastNLP import DataSet
    from fastNLP import DataSetIter
    from my_bert_match import addWordPiece, addSeqlen, addWords, processItem, processNum
    p_Candidate_phrases = []
    phrase_nums_per_hpo = []
    Candidate_hpos = []
    for raw_phrase, Candidate_phrase, Candidate_hpos_sub in Candidate_hpos_sub_total:
        p_Candidate_phrases.extend(
            [raw_phrase + "::" + item for item in Candidate_phrase])
        phrase_nums_per_hpo.append(len(Candidate_phrase))
        Candidate_hpos.append(Candidate_hpos_sub)
    Candidate_dataset = DataSet({"raw_words": p_Candidate_phrases})
    Candidate_dataset.apply(addWords, new_field_name="p_words")
    Candidate_dataset.apply(addWordPiece, new_field_name="t_words")
    Candidate_dataset.apply(processItem, new_field_name="word_pieces")
    Candidate_dataset.apply(processNum, new_field_name="word_nums")
    Candidate_dataset.apply(addSeqlen, new_field_name="seq_len")
    Candidate_dataset.field_arrays["word_pieces"].is_input = True
    Candidate_dataset.field_arrays["seq_len"].is_input = True
    Candidate_dataset.field_arrays["word_nums"].is_input = True
    test_batch = DataSetIter(batch_size=128,
                             dataset=Candidate_dataset,
                             sampler=None)

    outputs = []
    for batch_x, batch_y in test_batch:
        _move_dict_value_to_device(batch_x,
                                   batch_y,
                                   device=_get_model_device(model))
        outputs.append(
            model.forward(batch_x["word_pieces"], batch_x["word_nums"],
                          batch_x["seq_len"])['pred'])
    outputs = torch.cat(outputs)
    outputs = torch.nn.functional.softmax(outputs,
                                          dim=1).cpu().detach().numpy()
    # print(outputs.size)
    results_2 = np.array([item[2] for item in outputs])
    results_1 = np.array([item[1] for item in outputs])

    # 按短语分组
    count = 0
    index = 0
    ans = []
    for group_num in phrase_nums_per_hpo:
        g_results_2 = results_2[index:index + group_num]
        g_results_1 = results_1[index:index + group_num]
        Candidate_hpos_sub = Candidate_hpos[count]
        index += group_num
        count += 1
        # 如果这里已经能找到精确匹配的就直接输出
        if max(g_results_2) >= threshold:
            ans.append([
                Candidate_hpos_sub[int(np.argmax(g_results_2))],
                max(g_results_2), "2"
            ])
            continue
        if max(g_results_1) >= threshold:
            ans.append([
                Candidate_hpos_sub[int(np.argmax(g_results_1))],
                max(g_results_1), "1"
            ])
            continue
        ans.append(["None", None, "0"])
    return ans
예제 #11
0
def _check_code(dataset,
                model,
                losser,
                metrics,
                batch_size=DEFAULT_CHECK_BATCH_SIZE,
                dev_data=None,
                metric_key=None,
                check_level=0):
    # check get_loss 方法
    model_devcie = model.parameters().__next__().device

    batch = Batch(dataset=dataset,
                  batch_size=batch_size,
                  sampler=SequentialSampler())
    for batch_count, (batch_x, batch_y) in enumerate(batch):
        _move_dict_value_to_device(batch_x, batch_y, device=model_devcie)
        # forward check
        if batch_count == 0:
            info_str = ""
            input_fields = _get_value_info(batch_x)
            target_fields = _get_value_info(batch_y)
            if len(input_fields) > 0:
                info_str += "input fields after batch(if batch size is {}):\n".format(
                    batch_size)
                info_str += "\n".join(input_fields)
                info_str += '\n'
            else:
                raise RuntimeError("There is no input field.")
            if len(target_fields) > 0:
                info_str += "target fields after batch(if batch size is {}):\n".format(
                    batch_size)
                info_str += "\n".join(target_fields)
                info_str += '\n'
            else:
                info_str += 'There is no target field.'
            print(info_str)
            _check_forward_error(forward_func=model.forward,
                                 dataset=dataset,
                                 batch_x=batch_x,
                                 check_level=check_level)

        refined_batch_x = _build_args(model.forward, **batch_x)
        pred_dict = model(**refined_batch_x)
        func_signature = get_func_signature(model.forward)
        if not isinstance(pred_dict, dict):
            raise TypeError(
                f"The return value of {func_signature} should be `dict`, not `{type(pred_dict)}`."
            )

        # loss check
        try:
            loss = losser(pred_dict, batch_y)
            # check loss output
            if batch_count == 0:
                if not isinstance(loss, torch.Tensor):
                    raise TypeError(
                        f"The return value of {get_func_signature(losser.get_loss)} should be `torch.Tensor`, "
                        f"but got `{type(loss)}`.")
                if len(loss.size()) != 0:
                    raise ValueError(
                        f"The size of return value of {get_func_signature(losser.get_loss)} is {loss.size()}, "
                        f"should be torch.size([])")
            loss.backward()
        except CheckError as e:
            # TODO: another error raised if CheckError caught
            pre_func_signature = get_func_signature(model.forward)
            _check_loss_evaluate(prev_func_signature=pre_func_signature,
                                 func_signature=e.func_signature,
                                 check_res=e.check_res,
                                 pred_dict=pred_dict,
                                 target_dict=batch_y,
                                 dataset=dataset,
                                 check_level=check_level)
        model.zero_grad()
        if batch_count + 1 >= DEFAULT_CHECK_NUM_BATCH:
            break

    if dev_data is not None:
        tester = Tester(data=dataset[:batch_size * DEFAULT_CHECK_NUM_BATCH],
                        model=model,
                        metrics=metrics,
                        batch_size=batch_size,
                        verbose=-1)
        evaluate_results = tester.test()
        _check_eval_results(metrics=evaluate_results,
                            metric_key=metric_key,
                            metric_list=metrics)
예제 #12
0
    def train_shared(self, pbar=None, max_step=None, dag=None):
        """Train the language model for 400 steps of minibatches of 64
        examples.

        Args:
            max_step: Used to run extra training steps as a warm-up.
            dag: If not None, is used instead of calling sample().

        BPTT is truncated at 35 timesteps.

        For each weight update, gradients are estimated by sampling M models
        from the fixed controller policy, and averaging their gradients
        computed on a batch of training data.
        """
        model = self.shared
        model.train()
        self.controller.eval()

        hidden = self.shared.init_hidden(self.batch_size)

        abs_max_grad = 0
        abs_max_hidden_norm = 0
        step = 0
        raw_total_loss = 0
        total_loss = 0
        train_idx = 0
        avg_loss = 0
        data_iterator = Batch(self.train_data,
                              batch_size=self.batch_size,
                              sampler=self.sampler,
                              as_numpy=False,
                              prefetch=self.prefetch)

        for batch_x, batch_y in data_iterator:
            _move_dict_value_to_device(batch_x,
                                       batch_y,
                                       device=self._model_device)
            indices = data_iterator.get_batch_indices()
            # negative sampling; replace unknown; re-weight batch_y
            self.callback_manager.on_batch_begin(batch_x, batch_y, indices)
            # prediction = self._data_forward(self.model, batch_x)

            dags = self.controller.sample(1)
            inputs, targets = batch_x, batch_y
            # self.callback_manager.on_loss_begin(batch_y, prediction)
            loss, hidden, extra_out = self.get_loss(inputs, targets, hidden,
                                                    dags)
            hidden.detach_()

            avg_loss += loss.item()

            # Is loss NaN or inf? requires_grad = False
            self.callback_manager.on_backward_begin(loss, self.model)
            self._grad_backward(loss)
            self.callback_manager.on_backward_end(self.model)

            self._update()
            self.callback_manager.on_step_end(self.optimizer)

            if (self.step + 1) % self.print_every == 0:
                if self.use_tqdm:
                    print_output = "loss:{0:<6.5f}".format(avg_loss /
                                                           self.print_every)
                    pbar.update(self.print_every)
                else:
                    end = time.time()
                    diff = timedelta(seconds=round(end - start))
                    print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time: {}".format(
                        epoch, self.step, avg_loss, diff)
                pbar.set_postfix_str(print_output)
                avg_loss = 0
            self.step += 1
            step += 1
            self.shared_step += 1
            self.callback_manager.on_batch_end()
예제 #13
0
    def _train(self):
        if not self.use_tqdm:
            from fastNLP.core.utils import pseudo_tqdm as inner_tqdm
        else:
            inner_tqdm = tqdm
        self.step = 0
        start = time.time()
        data_iterator = Batch(self.train_data,
                              batch_size=self.batch_size,
                              sampler=self.sampler,
                              as_numpy=False)
        total_steps = data_iterator.num_batches * self.n_epochs
        with inner_tqdm(total=total_steps,
                        postfix='loss:{0:<6.5f}',
                        leave=False,
                        dynamic_ncols=True) as pbar:
            avg_loss = 0
            for epoch in range(1, self.n_epochs + 1):
                pbar.set_description_str(
                    desc="Epoch {}/{}".format(epoch, self.n_epochs))
                # early stopping
                self.callback_manager.before_epoch(epoch, self.n_epochs)
                for batch_x, batch_y in data_iterator:
                    indices = data_iterator.get_batch_indices()
                    # negative sampling; replace unknown; re-weight batch_y
                    self.callback_manager.before_batch(batch_x, batch_y,
                                                       indices)
                    _move_dict_value_to_device(batch_x,
                                               batch_y,
                                               device=self._model_device)
                    prediction = self._data_forward(self.model, batch_x)

                    # edit prediction
                    self.callback_manager.before_loss(batch_y, prediction)
                    loss = self._compute_loss(prediction, batch_y)
                    avg_loss += loss.item()

                    # Is loss NaN or inf? requires_grad = False
                    self.callback_manager.before_backward(loss, self.model)
                    self._grad_backward(loss)
                    # gradient clipping
                    self.callback_manager.after_backward(self.model)

                    self._update()
                    # lr scheduler; lr_finder; one_cycle
                    self.callback_manager.after_step(self.optimizer)

                    self._summary_writer.add_scalar("loss",
                                                    loss.item(),
                                                    global_step=self.step)
                    for name, param in self.model.named_parameters():
                        if param.requires_grad:
                            self._summary_writer.add_scalar(
                                name + "_mean",
                                param.mean(),
                                global_step=self.step)
                            # self._summary_writer.add_scalar(name + "_std", param.std(), global_step=self.step)
                            # self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=self.step)
                    if (self.step + 1) % self.print_every == 0:
                        if self.use_tqdm:
                            print_output = "loss:{0:<6.5f}".format(
                                avg_loss / self.print_every)
                            pbar.update(self.print_every)
                        else:
                            end = time.time()
                            diff = timedelta(seconds=round(end - start))
                            print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time: {}".format(
                                epoch, self.step, avg_loss, diff)
                        pbar.set_postfix_str(print_output)
                        avg_loss = 0
                    self.step += 1
                    # do nothing
                    self.callback_manager.after_batch()

                    if ((self.validate_every > 0 and self.step % self.validate_every == 0) or
                        (self.validate_every < 0 and self.step % len(data_iterator)) == 0) \
                            and self.dev_data is not None:
                        eval_res = self._do_validation(epoch=epoch,
                                                       step=self.step)
                        eval_str = "Evaluation at Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step,
                                                                                    total_steps) + \
                                   self.tester._format_eval_results(eval_res)
                        pbar.write(eval_str)

                # if self.validate_every < 0 and self.dev_data:
                #     eval_res = self._do_validation(epoch=epoch, step=self.step)
                #     eval_str = "Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, total_steps) + \
                #                self.tester._format_eval_results(eval_res)
                #     pbar.write(eval_str)
                if epoch != self.n_epochs:
                    data_iterator = Batch(self.train_data,
                                          batch_size=self.batch_size,
                                          sampler=self.sampler,
                                          as_numpy=False)
                # lr decay; early stopping
                self.callback_manager.after_epoch(epoch, self.n_epochs,
                                                  self.optimizer)
            pbar.close()
예제 #14
0
파일: trainer.py 프로젝트: wzhystar/fastNLP
    def _train(self):
        if not self.use_tqdm:
            from fastNLP.core.utils import pseudo_tqdm as inner_tqdm
        else:
            inner_tqdm = tqdm
        self.step = 0
        start = time.time()
        total_steps = (len(self.train_data) // self.batch_size + int(
            len(self.train_data) % self.batch_size != 0)) * self.n_epochs
        with inner_tqdm(total=total_steps, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar:
            avg_loss = 0
            data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False,
                                  prefetch=self.prefetch)
            for epoch in range(1, self.n_epochs+1):
                pbar.set_description_str(desc="Epoch {}/{}".format(epoch, self.n_epochs))
                # early stopping
                self.callback_manager.on_epoch_begin(epoch, self.n_epochs)
                for batch_x, batch_y in data_iterator:
                    _move_dict_value_to_device(batch_x, batch_y, device=self._model_device)
                    indices = data_iterator.get_batch_indices()
                    # negative sampling; replace unknown; re-weight batch_y
                    self.callback_manager.on_batch_begin(batch_x, batch_y, indices)
                    prediction = self._data_forward(self.model, batch_x)

                    # edit prediction
                    self.callback_manager.on_loss_begin(batch_y, prediction)
                    loss = self._compute_loss(prediction, batch_y)
                    avg_loss += loss.item()

                    # Is loss NaN or inf? requires_grad = False
                    self.callback_manager.on_backward_begin(loss, self.model)
                    self._grad_backward(loss)
                    self.callback_manager.on_backward_end(self.model)

                    self._update()
                    self.callback_manager.on_step_end(self.optimizer)

                    if (self.step+1) % self.print_every == 0:
                        if self.use_tqdm:
                            print_output = "loss:{0:<6.5f}".format(avg_loss / self.print_every)
                            pbar.update(self.print_every)
                        else:
                            end = time.time()
                            diff = timedelta(seconds=round(end - start))
                            print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time: {}".format(
                                epoch, self.step, avg_loss, diff)
                        pbar.set_postfix_str(print_output)
                        avg_loss = 0
                    self.step += 1
                    self.callback_manager.on_batch_end()

                    if ((self.validate_every > 0 and self.step % self.validate_every == 0) or
                        (self.validate_every < 0 and self.step % len(data_iterator) == 0)) \
                            and self.dev_data is not None:
                        eval_res = self._do_validation(epoch=epoch, step=self.step)
                        eval_str = "Evaluation at Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step,
                                                                                    total_steps) + \
                                   self.tester._format_eval_results(eval_res)
                        pbar.write(eval_str)

                # ================= mini-batch end ==================== #

                # lr decay; early stopping
                self.callback_manager.on_epoch_end(epoch, self.n_epochs, self.optimizer)
            # =============== epochs end =================== #
            pbar.close()
예제 #15
0
def train():
    n_epochs = 10
    train_set = data_set_loader._load('../models/all4bert_new_triple.txt')
    train_set, tmp_set = train_set.split(0.2)
    val_set, test_set = tmp_set.split(0.5)
    data_bundle = [train_set, val_set, test_set]

    for dataset in data_bundle:
        dataset.apply(addWords, new_field_name="p_words")
        dataset.apply(addWordPiece, new_field_name="t_words")
        dataset.apply(processItem, new_field_name="word_pieces")
        dataset.apply(processNum, new_field_name="word_nums")
        dataset.apply(addSeqlen, new_field_name="seq_len")
        dataset.apply(processTarget, new_field_name="target")

    for dataset in data_bundle:
        dataset.field_arrays["word_pieces"].is_input = True
        dataset.field_arrays["seq_len"].is_input = True
        dataset.field_arrays["word_nums"].is_input = True
        dataset.field_arrays["target"].is_target = True

    print("In total " + str(len(data_bundle)) + " datasets:")
    print("Trainset has " + str(len(train_set)) + " instances.")
    print("Validateset has " + str(len(val_set)) + " instances.")
    print("Testset has " + str(len(test_set)) + " instances.")
    train_set.print_field_meta()
    # print(train_set)
    from fastNLP.models.Mybert import BertForSentenceMatching
    from fastNLP import AccuracyMetric, DataSetIter

    from fastNLP.core.utils import _pseudo_tqdm as tqdm
    # 注意这里是表明分的类数
    model = BertForSentenceMatching(embed, 3)
    if torch.cuda.is_available():
        model = _move_model_to_device(model, device=0)
    # print(model)
    train_batch = DataSetIter(batch_size=16, dataset=train_set, sampler=None)
    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
    Lossfunc = torch.nn.CrossEntropyLoss()
    with tqdm(total=n_epochs,
              postfix='loss:{0:<6.5f}',
              leave=False,
              dynamic_ncols=True) as pbar:
        print_every = 10
        for epoch in range(1, n_epochs + 1):
            pbar.set_description_str(
                desc="Epoch {}/{}".format(epoch, n_epochs))
            avg_loss = 0
            step = 0
            for batch_x, batch_y in train_batch:
                step += 1
                _move_dict_value_to_device(batch_x,
                                           batch_y,
                                           device=_get_model_device(model))
                optimizer.zero_grad()
                output = model.forward(batch_x["word_pieces"],
                                       batch_x["word_nums"],
                                       batch_x["seq_len"])
                loss = Lossfunc(output['pred'], batch_y['target'])
                loss.backward()
                optimizer.step()
                avg_loss += loss.item()
                if step % print_every == 0:
                    avg_loss = float(avg_loss) / print_every
                    print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6}".format(
                        epoch, step, avg_loss)
                    pbar.update(print_every)
                    pbar.set_postfix_str(print_output)
                    avg_loss = 0
            metric = AccuracyMetric()
            val_batch = DataSetIter(batch_size=8,
                                    dataset=val_set,
                                    sampler=None)
            for batch_x, batch_y in val_batch:
                _move_dict_value_to_device(batch_x,
                                           batch_y,
                                           device=_get_model_device(model))
                output = model.predict(batch_x["word_pieces"],
                                       batch_x["word_nums"],
                                       batch_x["seq_len"])
                metric(output, batch_y)
            eval_result = metric.get_metric()
            print("ACC on Validate Set:", eval_result)
            from fastNLP.io import ModelSaver
            saver = ModelSaver("../models/bert_model_max_triple.pkl")
            saver.save_pytorch(model, param_only=False)
        pbar.close()
    metric = AccuracyMetric()
    test_batch = DataSetIter(batch_size=8, dataset=test_set, sampler=None)
    for batch_x, batch_y in test_batch:
        _move_dict_value_to_device(batch_x,
                                   batch_y,
                                   device=_get_model_device(model))
        output = model.predict(batch_x["word_pieces"], batch_x["word_nums"],
                               batch_x["seq_len"])
        metric(output, batch_y)
    eval_result = metric.get_metric()
    print("ACC on Test Set:", eval_result)
    from fastNLP.io import ModelSaver
    saver = ModelSaver("../models/bert_model_max_triple.pkl")
    saver.save_pytorch(model, param_only=False)