Ejemplos de build_bpe en Python

Lenguaje de programación: Python

Namespace/Package Name: fairseq_stchde.data.encoders

Método / Función: build_bpe

Ejemplos en hotexamples.com: 7

Python build_bpe - 7 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de fairseq_stchde.data.encoders.build_bpe extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: multilingual_masked_lm.py Proyecto: dubelbog/st_ch_de

    def _get_whole_word_mask(self):
        # create masked input and targets
        if self.args.mask_whole_words:
            bpe = encoders.build_bpe(self.args)
            if bpe is not None:

                def is_beginning_of_word(i):
                    if i < self.source_dictionary.nspecial:
                        # special elements are always considered beginnings
                        return True
                    tok = self.source_dictionary[i]
                    if tok.startswith("madeupword"):
                        return True
                    try:
                        return bpe.is_beginning_of_word(tok)
                    except ValueError:
                        return True

                mask_whole_words = torch.ByteTensor(
                    list(
                        map(is_beginning_of_word,
                            range(len(self.source_dictionary)))))
        else:
            mask_whole_words = None
        return mask_whole_words

Ejemplo n.º 2

Mostrar archivo

Archivo: hub_utils.py Proyecto: dubelbog/st_ch_de

    def __init__(self, cfg, task, models):
        super().__init__()
        self.cfg = cfg
        self.task = task
        self.models = nn.ModuleList(models)
        self.src_dict = task.source_dictionary
        self.tgt_dict = task.target_dictionary

        # optimize model for generation
        for model in self.models:
            model.prepare_for_inference_(cfg)

        # Load alignment dictionary for unknown word replacement
        # (None if no unknown word replacement, empty if no path to align dictionary)
        self.align_dict = utils.load_align_dict(cfg.generation.replace_unk)

        self.tokenizer = encoders.build_tokenizer(cfg.tokenizer)
        self.bpe = encoders.build_bpe(cfg.bpe)

        self.max_positions = utils.resolve_max_positions(
            self.task.max_positions(), *[model.max_positions() for model in models]
        )

        # this is useful for determining the device
        self.register_buffer("_float_tensor", torch.tensor([0], dtype=torch.float))

Ejemplo n.º 3

Mostrar archivo

Archivo: hub_interface.py Proyecto: dubelbog/st_ch_de

    def __init__(self, cfg, task, model):
        super().__init__()
        self.cfg = cfg
        self.task = task
        self.model = model

        self.bpe = encoders.build_bpe(cfg.bpe)

        # this is useful for determining the device
        self.register_buffer("_float_tensor",
                             torch.tensor([0], dtype=torch.float))

Ejemplo n.º 4

Mostrar archivo

Archivo: utils.py Proyecto: dubelbog/st_ch_de

def get_whole_word_mask(args, dictionary):
    bpe = encoders.build_bpe(args)
    if bpe is not None:

        def is_beginning_of_word(i):
            if i < dictionary.nspecial:
                # special elements are always considered beginnings
                return True
            tok = dictionary[i]
            if tok.startswith("madeupword"):
                return True
            try:
                return bpe.is_beginning_of_word(tok)
            except ValueError:
                return True

        mask_whole_words = torch.ByteTensor(
            list(map(is_beginning_of_word, range(len(dictionary)))))
        return mask_whole_words
    return None

Ejemplo n.º 5

Mostrar archivo

Archivo: fairseq_task.py Proyecto: dubelbog/st_ch_de

 def build_bpe(self, args):
     """Build the tokenizer for this task."""
     return encoders.build_bpe(args)

Ejemplo n.º 6

Mostrar archivo

Archivo: hub_utils.py Proyecto: dubelbog/st_ch_de

 def __init__(self, bpe, **kwargs):
     super().__init__()
     args = argparse.Namespace(bpe=bpe, **kwargs)
     self.bpe = encoders.build_bpe(args)
     assert self.bpe is not None

Ejemplo n.º 7

Mostrar archivo

Archivo: speech_to_text.py Proyecto: dubelbog/st_ch_de

 def build_bpe(self, args):
     logger.info(f"tokenizer: {self.data_cfg.bpe_tokenizer}")
     return encoders.build_bpe(Namespace(**self.data_cfg.bpe_tokenizer))