def get_operations( truth: Union[str, List[str]], hypothesis: Union[str, List[str]], truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform, hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform, **kwargs ): """ blah """ # deal with old API if "standardize" in kwargs: truth = _standardize_transform(truth) hypothesis = _standardize_transform(hypothesis) if "words_to_filter" in kwargs: t = tr.RemoveSpecificWords(kwargs["words_to_filter"]) truth = t(truth) hypothesis = t(hypothesis) # Preprocess truth and hypothesisi truth, hypothesis = _preprocess( truth, hypothesis, truth_transform, hypothesis_transform ) # Get the operation counts (#hits, #substitutions, #deletions, #insertions) operations = _get_editops(truth, hypothesis) return operations
def wer(truth: Union[str, List[str]], hypothesis: Union[str, List[str]], truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform, hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform, **kwargs) -> float: """ Calculate the WER between between a set of ground-truth sentences and a set of hypothesis sentences. The set of sentences can be given as a string or a list of strings. A string input is assumed to be a single sentence. A list of strings is assumed to be multiple sentences. Each word in a sentence is separated by one or more spaces. A sentence is not expected to end with a specific token (such as a `.`). If the ASR does delimit sentences it is expected that these tokens are filtered out. The optional `transforms` arguments can be used to apply pre-processing to respectively the ground truth and hypotheses input. Note that the transform should ALWAYS include `SentencesToListOfWords`, as that is the expected input. :param truth: the ground-truth sentence(s) as a string or list of strings :param hypothesis: the hypothesis sentence(s) as a string or list of strings :param truth_transform: the transformation to apply on the truths input :param hypothesis_transform: the transformation to apply on the hypothesis input :return: the WER as a floating number between 0 and 1 """ # deal with old API if "standardize" in kwargs: truth = _standardize_transform(truth) hypothesis = _standardize_transform(hypothesis) if "words_to_filter" in kwargs: t = tr.RemoveSpecificWords(kwargs["words_to_filter"]) truth = t(truth) hypothesis = t(hypothesis) # Apply transforms. By default, it collapses input to a list of words truth = truth_transform(truth) hypothesis = hypothesis_transform(hypothesis) # raise an error if the ground truth is empty if len(truth) == 0: raise ValueError("the ground truth cannot be an empty") # tokenize each word into an integer vocabulary = set(truth + hypothesis) word2char = dict(zip(vocabulary, range(len(vocabulary)))) truth_chars = [chr(word2char[w]) for w in truth] hypothesis_chars = [chr(word2char[w]) for w in hypothesis] # now that the words are tokenized, we can do alignment distance = _edit_distance(truth_chars, hypothesis_chars) # and the WER is simply distance divided by the length of the truth n = len(truth_chars) error_rate = distance / n return error_rate
def compute_measures( truth: Union[str, List[str]], hypothesis: Union[str, List[str]], truth_transform: Union[tr.Compose, tr.AbstractTransform] = wer_default, hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = wer_default, **kwargs ) -> Dict[str, float]: """ Calculate error measures between a set of ground-truth sentences and a set of hypothesis sentences. The set of sentences can be given as a string or a list of strings. A string input is assumed to be a single sentence. A list of strings is assumed to be multiple sentences which need to be evaluated independently. Each word in a sentence is separated by one or more spaces. A sentence is not expected to end with a specific token (such as a `.`). If the ASR system does delimit sentences it is expected that these tokens are filtered out. The optional `transforms` arguments can be used to apply pre-processing to respectively the ground truth and hypotheses input. By default, the following transform is applied to both the ground truth and hypothesis string(s). These steps are required and necessary in order to compute the measures. 1) The start and end of a string are stripped of white-space symbols 2) Contiguous spaces (e.g ` `) are reduced to a single space (e.g ` `) 3) A sentence (with a single space (` `) between words) is reduced to a list of words Any non-default transformation is required to reduce the input to at least one list of words in order to facility the computation of the edit distance. :param truth: the ground-truth sentence(s) as a string or list of strings :param hypothesis: the hypothesis sentence(s) as a string or list of strings :param truth_transform: the transformation to apply on the truths input :param hypothesis_transform: the transformation to apply on the hypothesis input :return: a dict with WER, MER, WIP and WIL measures as floating point numbers """ # deprecated old API if "standardize" in kwargs: warnings.warn( UserWarning( "keyword argument `standardize` is deprecated. " "Please use `truth_transform=jiwer.transformations.wer_standardize` and" " `hypothesis_transform=jiwer.transformations.wer_standardize` instead" ) ) truth_transform = wer_standardize hypothesis_transform = wer_standardize if "words_to_filter" in kwargs: warnings.warn( UserWarning( "keyword argument `words_to_filter` is deprecated. " "Please compose your own transform with `jiwer.transforms.RemoveSpecificWords" ) ) t = tr.RemoveSpecificWords(kwargs["words_to_filter"]) truth = t(truth) hypothesis = t(hypothesis) # validate input type if isinstance(truth, str): truth = [truth] if isinstance(hypothesis, str): hypothesis = [hypothesis] if any(len(t) == 0 for t in truth): raise ValueError("one or more groundtruths are empty strings") # Preprocess truth and hypothesis truth, hypothesis = _preprocess( truth, hypothesis, truth_transform, hypothesis_transform ) # keep track of total hits, substitutions, deletions and insertions # across all input sentences H, S, D, I = 0, 0, 0, 0 # also keep track of the total number of ground truth words and hypothesis words gt_tokens, hp_tokens = 0, 0 for groundtruth_sentence, hypothesis_sentence in zip(truth, hypothesis): # Get the operation counts (#hits, #substitutions, #deletions, #insertions) hits, substitutions, deletions, insertions = _get_operation_counts( groundtruth_sentence, hypothesis_sentence ) H += hits S += substitutions D += deletions I += insertions gt_tokens += len(groundtruth_sentence) hp_tokens += len(hypothesis_sentence) # Compute Word Error Rate wer = float(S + D + I) / float(H + S + D) # Compute Match Error Rate mer = float(S + D + I) / float(H + S + D + I) # Compute Word Information Preserved wip = (float(H) / gt_tokens) * (float(H) / hp_tokens) if hp_tokens >= 1 else 0 # Compute Word Information Lost wil = 1 - wip return { "wer": wer, "mer": mer, "wil": wil, "wip": wip, "hits": H, "substitutions": S, "deletions": D, "insertions": I, }
def compute_measures( truth: Union[str, List[str]], hypothesis: Union[str, List[str]], truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform, hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform, **kwargs ) -> Mapping[str, float]: """ Calculate error measures between a set of ground-truth sentences and a set of hypothesis sentences. The set of sentences can be given as a string or a list of strings. A string input is assumed to be a single sentence. A list of strings is assumed to be multiple sentences. Each word in a sentence is separated by one or more spaces. A sentence is not expected to end with a specific token (such as a `.`). If the ASR does delimit sentences it is expected that these tokens are filtered out. The optional `transforms` arguments can be used to apply pre-processing to respectively the ground truth and hypotheses input. Note that the transform should ALWAYS include `SentencesToListOfWords`, as that is the expected input. :param truth: the ground-truth sentence(s) as a string or list of strings :param hypothesis: the hypothesis sentence(s) as a string or list of strings :param truth_transform: the transformation to apply on the truths input :param hypothesis_transform: the transformation to apply on the hypothesis input :return: a dict with WER, MER, WIP and WIL measures as floating point numbers """ # deal with old API if "standardize" in kwargs: truth = _standardize_transform(truth) hypothesis = _standardize_transform(hypothesis) if "words_to_filter" in kwargs: t = tr.RemoveSpecificWords(kwargs["words_to_filter"]) truth = t(truth) hypothesis = t(hypothesis) # Preprocess truth and hypothesisi truth, hypothesis = _preprocess( truth, hypothesis, truth_transform, hypothesis_transform ) # Get the operation counts (#hits, #substitutions, #deletions, #insertions) H, S, D, I = _get_operation_counts(truth, hypothesis) # Compute Word Error Rate wer = float(S + D + I) / float(H + S + D) # Compute Match Error Rate mer = float(S + D + I) / float(H + S + D + I) # Compute Word Information Preserved wip = (float(H) / len(truth)) * (float(H) / len(hypothesis)) if hypothesis else 0 # Compute Word Information Lost wil = 1 - wip return { "wer": wer, "mer": mer, "wil": wil, "wip": wip, }