Python bow_tag_tokens Exemples, jiant.utils.tokenization_utils.bow_tag_tokens Python Exemples

Exemple #1

0

Afficher le fichier

    def normalize_tokenizations(cls, tokenizer, space_tokenization,
                                target_tokenization):
        """See tokenization_normalization.py for details"""
        space_tokenization = [token.lower() for token in space_tokenization]
        modifed_space_tokenization = bow_tag_tokens(space_tokenization)
        modifed_target_tokenization = process_sentencepiece_tokens(
            target_tokenization)

        return modifed_space_tokenization, modifed_target_tokenization

Exemple #2

0

Afficher le fichier

    def normalize_tokenizations(cls, tokenizer, space_tokenization,
                                target_tokenization):
        """See tokenization_normalization.py for details"""
        modifed_space_tokenization = bow_tag_tokens(space_tokenization)
        modifed_target_tokenization = ["Ġ" + target_tokenization[0]
                                       ] + target_tokenization[1:]
        modifed_target_tokenization = process_bytebpe_tokens(
            modifed_target_tokenization)

        return modifed_space_tokenization, modifed_target_tokenization

Exemple #3

0

Afficher le fichier

    def normalize_tokenizations(cls, tokenizer, space_tokenization,
                                target_tokenization):
        """See tokenization_normalization.py for details"""
        if tokenizer.init_kwargs.get("do_lower_case", False):
            space_tokenization = [
                token.lower() for token in space_tokenization
            ]
        modifed_space_tokenization = bow_tag_tokens(space_tokenization)
        modifed_target_tokenization = process_wordpiece_tokens(
            target_tokenization)

        return modifed_space_tokenization, modifed_target_tokenization