Beispiel #1
0
    def evaluate(self, clean_file, corrupt_file):
        """
        clean_file = f"{DEFAULT_DATA_PATH}/traintest/clean.txt"
        corrupt_file = f"{DEFAULT_DATA_PATH}/traintest/corrupt.txt"
        """
        for x, y, z in zip([""], [clean_file], [corrupt_file]):
            print(x, y, z)
            test_data = load_data(x, y, z)
            clean_data = [x[0] for x in test_data]
            corrupt_data = [x[1] for x in test_data]
            inference_st_time = time()
            predictions_data = self.correct_strings(corrupt_data)
            assert len(clean_data) == len(corrupt_data) == len(
                predictions_data)
            corr2corr, corr2incorr, incorr2corr, incorr2incorr = \
                get_metrics(clean_data,corrupt_data,predictions_data)

            print("total inference time for this data is: {:4f} secs".format(
                time() - inference_st_time))
            print("###############################################")
            print("total token count: {}".format(corr2corr + corr2incorr +
                                                 incorr2corr + incorr2incorr))
            print(
                f"corr2corr:{corr2corr}, corr2incorr:{corr2incorr}, incorr2corr:{incorr2corr}, incorr2incorr:{incorr2incorr}"
            )
            print(
                f"accuracy is {(corr2corr+incorr2corr)/(corr2corr+corr2incorr+incorr2corr+incorr2incorr)}"
            )
            print(
                f"word correction rate is {(incorr2corr)/(incorr2corr+incorr2incorr)}"
            )
            print("###############################################")

        return
 def evaluate(self, clean_file, corrupt_file):
     """
     clean_file = f"{DEFAULT_DATA_PATH}/traintest/clean.txt"
     corrupt_file = f"{DEFAULT_DATA_PATH}/traintest/corrupt.txt"
     """
     self.__model_status()
     batch_size = 4 if self.device == "cpu" else 16
     for x, y, z in zip([""], [clean_file], [corrupt_file]):
         print(x, y, z)
         test_data = load_data(x, y, z)
         _ = model_inference(self.model,
                             test_data,
                             topk=1,
                             DEVICE=self.device,
                             BATCH_SIZE=batch_size,
                             vocab_=self.vocab)
     return