Esempio n. 1
0
def _ter_update(
    preds: Union[str, Sequence[str]],
    target: Sequence[Union[str, Sequence[str]]],
    tokenizer: _TercomTokenizer,
    total_num_edits: Tensor,
    total_tgt_length: Tensor,
    sentence_ter: Optional[List[Tensor]] = None,
) -> Tuple[Tensor, Tensor, Optional[List[Tensor]]]:
    """Update TER statistics.

    Args:
        preds:
            An iterable of hypothesis corpus.
        target:
            An iterable of iterables of reference corpus.
        tokenizer:
        total_num_edits:
            A total number of required edits to match hypothesis and reference sentences.
        total_tgt_length:
            A total average length of reference sentences.

    Return:
        total_num_edits:
            A total number of required edits to match hypothesis and reference sentences.
        total_tgt_length:
            A total average length of reference sentences.
        sentence_ter:
            (Optionally) A list of sentence-level TER.

    Raises:
        ValueError:
            If length of `preds` and `target` differs.
    """
    target, preds = _validate_inputs(target, preds)

    for (pred, tgt) in zip(preds, target):
        tgt_words_: List[List[str]] = [
            _preprocess_sentence(_tgt, tokenizer).split() for _tgt in tgt
        ]
        pred_words_: List[str] = _preprocess_sentence(pred, tokenizer).split()
        num_edits, tgt_length = _compute_sentence_statistics(
            pred_words_, tgt_words_)
        total_num_edits += num_edits
        total_tgt_length += tgt_length
        if sentence_ter is not None:
            sentence_ter.append(
                _compute_ter_score_from_statistics(num_edits,
                                                   tgt_length).unsqueeze(0))
    return total_num_edits, total_tgt_length, sentence_ter
Esempio n. 2
0
def _preprocess_sentences(
    preds: Union[str, Sequence[str]],
    target: Sequence[Union[str, Sequence[str]]],
    language: Union[Literal["en"], Literal["ja"]],
) -> Tuple[Union[str, Sequence[str]], Sequence[Union[str, Sequence[str]]]]:
    """Preprocess strings according to language requirements.

    Args:
        preds: An iterable of hypothesis corpus.
        target: An iterable of iterables of reference corpus.
        language: Language used in sentences. Only supports English (en) and Japanese (ja) for now. Defaults to en

    Return:
        Tuple of lists that contain the cleaned strings for target and preds

    Raises:
        ValueError: If a different language than ``'en'`` or ``'ja'`` is used
        ValueError: If length of target not equal to length of preds
        ValueError: If objects in reference and hypothesis corpus are not strings
    """
    # sanity checks
    target, preds = _validate_inputs(hypothesis_corpus=preds,
                                     reference_corpus=target)

    # preprocess string
    if language == "en":
        preprocess_function = _preprocess_en
    elif language == "ja":
        preprocess_function = _preprocess_ja
    else:
        raise ValueError(
            f"Expected argument `language` to either be `en` or `ja` but got {language}"
        )

    preds = [preprocess_function(pred) for pred in preds]
    target = [[preprocess_function(ref) for ref in reference]
              for reference in target]

    return preds, target
Esempio n. 3
0
def _chrf_score_update(
    preds: Union[str, Sequence[str]],
    target: Union[Sequence[str], Sequence[Sequence[str]]],
    total_preds_char_n_grams: Dict[int, Tensor],
    total_preds_word_n_grams: Dict[int, Tensor],
    total_target_char_n_grams: Dict[int, Tensor],
    total_target_word_n_grams: Dict[int, Tensor],
    total_matching_char_n_grams: Dict[int, Tensor],
    total_matching_word_n_grams: Dict[int, Tensor],
    n_char_order: int,
    n_word_order: int,
    n_order: float,
    beta: float,
    lowercase: bool,
    whitespace: bool,
    sentence_chrf_score: Optional[List[Tensor]] = None,
) -> Tuple[
    Dict[int, Tensor],
    Dict[int, Tensor],
    Dict[int, Tensor],
    Dict[int, Tensor],
    Dict[int, Tensor],
    Dict[int, Tensor],
    Optional[List[Tensor]],
]:
    """
    Args:
        preds: An iterable of hypothesis corpus.
        target: An iterable of iterables of reference corpus.
        total_preds_char_n_grams: A dictionary containing a total number of hypothesis character n-grams.
        total_preds_word_n_grams: A dictionary containing a total number of hypothesis word n-grams.
        total_target_char_n_grams: A dictionary containing a total number of reference character n-grams.
        total_target_word_n_grams: A dictionary containing a total number of reference word n-grams.
        total_matching_char_n_grams:
            A dictionary containing a total number of matching character n-grams between references and hypotheses.
        total_matching_word_n_grams:
            A dictionary containing a total number of total matching word n-grams between references and hypotheses.
        n_char_order: A character n-gram order.
        n_word_order: A word n-gram order.
        n_order: Sum of character and word n-gram order.
        beta: A parameter determining an importance of recall w.r.t. precision. If `beta=1`, their importance is equal.
        lowercase: An indication whether to enable case-insensitivity.
        whitespace: An indication whether to keep whitespaces during character n-gram extraction.
        sentence_chrf_score: A list of sentence-level chrF/chrF++ scores.

    Return:
        total_target_char_n_grams: number of reference character n-grams.
        total_target_word_n_grams: number of reference word n-grams.
        total_preds_char_n_grams: number of hypothesis character n-grams.
        total_preds_word_n_grams: number of hypothesis word n-grams.
        total_matching_char_n_grams: number of matching character n-grams between references and hypotheses.
        total_matching_word_n_grams: number of total matching word n-grams between references and hypotheses.
        sentence_chrf_score: A list of sentence-level chrF/chrF++ scores.

    Raises:
        ValueError:
            If length of ``preds`` and ``target`` differs.
    """
    target_corpus, preds = _validate_inputs(target, preds)

    for (pred, targets) in zip(preds, target_corpus):
        (
            pred_char_n_grams_counts,
            pred_word_n_grams_counts,
            pred_char_n_grams,
            pred_word_n_grams,
        ) = _get_n_grams_counts_and_total_ngrams(pred, n_char_order, n_word_order, lowercase, whitespace)
        total_preds_char_n_grams = _sum_over_dicts(total_preds_char_n_grams, pred_char_n_grams)
        total_preds_word_n_grams = _sum_over_dicts(total_preds_word_n_grams, pred_word_n_grams)

        (
            sentence_level_f_score,
            matching_char_n_grams,
            matching_word_n_grams,
            target_char_n_grams,
            target_word_n_grams,
        ) = _calculate_sentence_level_chrf_score(
            targets,  # type: ignore
            pred_char_n_grams_counts,
            pred_word_n_grams_counts,
            pred_char_n_grams,
            pred_word_n_grams,
            n_char_order,
            n_word_order,
            n_order,
            beta,
            lowercase,
            whitespace,
        )

        if sentence_chrf_score is not None:
            sentence_chrf_score.append(sentence_level_f_score.unsqueeze(0))

        total_target_char_n_grams = _sum_over_dicts(total_target_char_n_grams, target_char_n_grams)
        total_target_word_n_grams = _sum_over_dicts(total_target_word_n_grams, target_word_n_grams)
        total_matching_char_n_grams = _sum_over_dicts(total_matching_char_n_grams, matching_char_n_grams)
        total_matching_word_n_grams = _sum_over_dicts(total_matching_word_n_grams, matching_word_n_grams)

    return (
        total_preds_char_n_grams,
        total_preds_word_n_grams,
        total_target_char_n_grams,
        total_target_word_n_grams,
        total_matching_char_n_grams,
        total_matching_word_n_grams,
        sentence_chrf_score,
    )