>>> wer_score = wer.compute(predictions=predictions, references=references) >>> print(wer_score) 0.5 """ class AddSpacesToPunctuation(tr.AbstractTransform): def process_string(self, s: str): return re.sub("(['.,:;?!&])", r" \1 ", s) _transform = tr.Compose( [ AddSpacesToPunctuation(), tr.RemoveMultipleSpaces(), tr.Strip(), tr.SentencesToListOfWords(), tr.RemoveEmptyStrings(), ] ) class WER_punctuation(datasets.Metric): def _info(self): return datasets.MetricInfo( description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=datasets.Features( { "predictions": datasets.Value("string", id="sequence"),
def process_string(self, s: str): return list(s) def process_list(self, inp: List[str]): chars = [] for sent_idx, sentence in enumerate(inp): chars.extend(self.process_string(sentence)) if self.sentence_delimiter is not None and self.sentence_delimiter != "" and sent_idx < len( inp) - 1: chars.append(self.sentence_delimiter) return chars cer_transform = tr.Compose([ tr.RemoveMultipleSpaces(), tr.Strip(), SentencesToListOfCharacters(SENTENCE_DELIMITER) ]) else: cer_transform = tr.Compose([ tr.RemoveMultipleSpaces(), tr.Strip(), tr.ReduceToSingleSentence(SENTENCE_DELIMITER), tr.ReduceToListOfListOfChars(), ]) _CITATION = """\ @inproceedings{inproceedings, author = {Morris, Andrew and Maier, Viktoria and Green, Phil}, year = {2004}, month = {01},
import jiwer.transforms as tr __all__ = [ "wer_default", "wer_contiguous", "wer_standardize", "wer_standardize_contiguous", "cer_default_transform", ] ################################################################################ # implement transformations for WER (and accompanying measures) wer_default = tr.Compose([ tr.RemoveMultipleSpaces(), tr.Strip(), tr.ReduceToListOfListOfWords(), ]) wer_contiguous = tr.Compose([ tr.RemoveMultipleSpaces(), tr.Strip(), tr.ReduceToSingleSentence(), tr.ReduceToListOfListOfWords(), ]) wer_standardize = tr.Compose([ tr.ToLowerCase(), tr.ExpandCommonEnglishContractions(), tr.RemoveKaldiNonWords(), tr.RemoveWhiteSpace(replace_by_space=True),
import Levenshtein from typing import List, Mapping, Tuple, Union import jiwer.transforms as tr __all__ = ["wer", "mer", "wil", "wip", "compute_measures", "ops"] ################################################################################ # Implementation of the WER method, exposed publicly _default_transform = tr.Compose( [ tr.RemoveMultipleSpaces(), tr.Strip(), tr.SentencesToListOfWords(), tr.RemoveEmptyStrings(), ] ) _standardize_transform = tr.Compose( [ tr.ToLowerCase(), tr.ExpandCommonEnglishContractions(), tr.RemoveKaldiNonWords(), tr.RemoveWhiteSpace(replace_by_space=True), ] ) def ops(
class SentencesToListOfCharacters(tr.AbstractTransform): def process_string(self, s: str): return list(s) def process_list(self, inp: List[str]): chars = [] for sentence in inp: chars.extend(self.process_string(sentence)) return chars cer_transform = tr.Compose([ tr.RemoveMultipleSpaces(), tr.Strip(), SentencesToListOfCharacters(), ]) _CITATION = """\ @inproceedings{inproceedings, author = {Morris, Andrew and Maier, Viktoria and Green, Phil}, year = {2004}, month = {01}, pages = {}, title = {From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition.} } """ _DESCRIPTION = """\ Character error rate (CER) is a common metric of the performance of an automatic speech recognition system.