>>> wer_score = wer.compute(predictions=predictions, references=references)
    >>> print(wer_score)
    0.5
"""


class AddSpacesToPunctuation(tr.AbstractTransform):
    def process_string(self, s: str):
        return re.sub("(['.,:;?!&])", r" \1 ", s)


_transform = tr.Compose(
    [
        AddSpacesToPunctuation(),
        tr.RemoveMultipleSpaces(),
        tr.Strip(),
        tr.SentencesToListOfWords(),
        tr.RemoveEmptyStrings(),
    ]
)


class WER_punctuation(datasets.Metric):
    def _info(self):
        return datasets.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "predictions": datasets.Value("string", id="sequence"),
        def process_string(self, s: str):
            return list(s)

        def process_list(self, inp: List[str]):
            chars = []
            for sent_idx, sentence in enumerate(inp):
                chars.extend(self.process_string(sentence))
                if self.sentence_delimiter is not None and self.sentence_delimiter != "" and sent_idx < len(
                        inp) - 1:
                    chars.append(self.sentence_delimiter)
            return chars

    cer_transform = tr.Compose([
        tr.RemoveMultipleSpaces(),
        tr.Strip(),
        SentencesToListOfCharacters(SENTENCE_DELIMITER)
    ])
else:
    cer_transform = tr.Compose([
        tr.RemoveMultipleSpaces(),
        tr.Strip(),
        tr.ReduceToSingleSentence(SENTENCE_DELIMITER),
        tr.ReduceToListOfListOfChars(),
    ])

_CITATION = """\
@inproceedings{inproceedings,
    author = {Morris, Andrew and Maier, Viktoria and Green, Phil},
    year = {2004},
    month = {01},
Exemple #3
0
import jiwer.transforms as tr

__all__ = [
    "wer_default",
    "wer_contiguous",
    "wer_standardize",
    "wer_standardize_contiguous",
    "cer_default_transform",
]

################################################################################
# implement transformations for WER (and accompanying measures)

wer_default = tr.Compose([
    tr.RemoveMultipleSpaces(),
    tr.Strip(),
    tr.ReduceToListOfListOfWords(),
])

wer_contiguous = tr.Compose([
    tr.RemoveMultipleSpaces(),
    tr.Strip(),
    tr.ReduceToSingleSentence(),
    tr.ReduceToListOfListOfWords(),
])

wer_standardize = tr.Compose([
    tr.ToLowerCase(),
    tr.ExpandCommonEnglishContractions(),
    tr.RemoveKaldiNonWords(),
    tr.RemoveWhiteSpace(replace_by_space=True),
Exemple #4
0
import Levenshtein

from typing import List, Mapping, Tuple, Union

import jiwer.transforms as tr

__all__ = ["wer", "mer", "wil", "wip", "compute_measures", "ops"]

################################################################################
# Implementation of the WER method, exposed publicly

_default_transform = tr.Compose(
    [
        tr.RemoveMultipleSpaces(),
        tr.Strip(),
        tr.SentencesToListOfWords(),
        tr.RemoveEmptyStrings(),
    ]
)

_standardize_transform = tr.Compose(
    [
        tr.ToLowerCase(),
        tr.ExpandCommonEnglishContractions(),
        tr.RemoveKaldiNonWords(),
        tr.RemoveWhiteSpace(replace_by_space=True),
    ]
)


def ops(
Exemple #5
0
class SentencesToListOfCharacters(tr.AbstractTransform):
    def process_string(self, s: str):
        return list(s)

    def process_list(self, inp: List[str]):
        chars = []

        for sentence in inp:
            chars.extend(self.process_string(sentence))

        return chars


cer_transform = tr.Compose([
    tr.RemoveMultipleSpaces(),
    tr.Strip(),
    SentencesToListOfCharacters(),
])

_CITATION = """\
@inproceedings{inproceedings,
    author = {Morris, Andrew and Maier, Viktoria and Green, Phil},
    year = {2004},
    month = {01},
    pages = {},
    title = {From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition.}
}
"""

_DESCRIPTION = """\
Character error rate (CER) is a common metric of the performance of an automatic speech recognition system.