Ejemplo n.º 1
0
    def evaluate(self) -> None:
        """
        Evaluate test data according to bleu score.
        """
        logging.info(
            f"Evaluating model with this configuration: \n {self.model.config}"
        )

        # generate predictions and calculate bleu score
        hyps = []
        self.model.eval()
        with torch.no_grad():
            for batch in tqdm(self.src_test_generator):
                batch = batch.to(self.device)
                translations = self.model.generate(input_ids=batch)
                decoded = [
                    self.tokenizer.decode(translation,
                                          skip_special_tokens=True,
                                          clean_up_tokenization_spaces=False)
                    for translation in translations
                ]
                hyps = hyps + decoded
        bleu_score = calculate_bleu_score(hyps, self.tgt_test)
        logging.info(f"BLEU score on test data is: {bleu_score:.2f}")

        # write hypothesis to file
        hyps_path = os.path.join(self.experiment_path,
                                 f"model_test_hyps_bleu:{bleu_score:.2f}.txt")

        with open(hyps_path, "w") as file:
            for sent in hyps:
                file.write(sent + " \n")
        logging.info(f"Model hypothesis saved in {hyps_path}")
Ejemplo n.º 2
0
    def _evaluate_dev(
        self,
        dev_generator: Generator[torch.Tensor, None, None],
        tgt_dev: list,
        epoch: int,
    ) -> float:
        """
        Evaluate parallel dev dataset, write hypothesis to file, and displays bleu score.
        """

        # evaluate parallel dev dataset
        hyps = []
        self.model.eval()
        with torch.no_grad():
            for batch in tqdm(dev_generator):
                batch = batch.to(self.device)
                translations = self.model.generate(input_ids=batch)
                decoded = [
                    self.tokenizer.decode(translation,
                                          skip_special_tokens=True,
                                          clean_up_tokenization_spaces=False)
                    for translation in translations
                ]
                hyps = hyps + decoded
        bleu_score = calculate_bleu_score(hyps, tgt_dev)

        # write hypothesis to file
        hyps_path = os.path.join(self.experiment_path,
                                 f"epoch_{epoch+1}_dev_hyps.txt")
        with open(hyps_path, "w") as file:
            for sent in hyps:
                file.write(sent + " \n")
        self.logger.info(f"Model hypothesis saved in {hyps_path}")
        self.logger.info(
            f"BLEU score after epoch {epoch+1} is: {bleu_score:.2f}")

        return bleu_score
Ejemplo n.º 3
0
 def calc_generative_metrics(self, preds, target) -> dict:
     return calculate_bleu_score(preds, target)
Ejemplo n.º 4
0
                  checkpoint_dir='./checkpoints')
if trainer.checkpoint_manager.latest_checkpoint:
    print("Restored from {}".format(
        trainer.checkpoint_manager.latest_checkpoint))
else:
    print("Initializing from scratch.")

trainer.checkpoint.restore(trainer.checkpoint_manager.latest_checkpoint)


def do_translate(input_data):
    index = input_data[0]
    source = input_data[1][0]
    target = input_data[1][1]
    print(index)
    output = translate(source, data_loader, trainer, SEQ_MAX_LEN_TARGET)
    return {'source': source, 'target': target, 'output': output}


translated_data = []

for test_data in data:
    res = do_translate(test_data)
    translated_data.append(res['output'])

with open('translated_data', 'w') as f:
    f.write(str('\n'.join(translated_data)))

score, report = calculate_bleu_score(target_path='translated_data',
                                     ref_path=target_data_path)