Ejemplo n.º 1
0
def encode(model, output_type, n_threads, bos, eos, reverse, stream,
           dropout_prob):
    """Encode text to ids or subwords."""
    if n_threads < -1 or n_threads == 0:
        raise ValueError(
            'Invalid value for "--n_threads": must be -1 or positive integer, not "%d"'
            % n_threads)

    bpe = yttmc.BPE(model, n_threads)
    bpe.encode_cli(output_type, stream, bos, eos, reverse, dropout_prob)
Ejemplo n.º 2
0
def encode(model, output_type, n_threads, bos, eos, reverse, stream):
    """Encode text to ids or subwords."""
    output_type = output_type.lower()
    if output_type != "id" and output_type != "subword":
        raise ValueError(
            'Invalid value for "--output_type": must be equal to "id" or "subword", not "%d".'
            % output_type)
    if n_threads < -1 or n_threads == 0:
        raise ValueError(
            'Invalid value for "--n_threads": must be -1 or positive integer, not "%d"'
            % n_threads)

    bpe = yttmc.BPE(model, n_threads)
    bpe.encode_cli(output_type, stream, bos, eos, reverse)
Ejemplo n.º 3
0
 def __init__(self, model: str, n_threads: int = -1):
     self.bpe_cython = _youtokentome_cython.BPE(model_path=model,
                                                n_threads=n_threads)
Ejemplo n.º 4
0
def vocab(model, verbose):
    """Print list of learned subwords."""
    bpe = yttmc.BPE(model)
    bpe.vocab_cli(verbose)
Ejemplo n.º 5
0
def decode(model):
    """Decode ids to text."""
    bpe = yttmc.BPE(model)
    bpe.decode_cli()
Ejemplo n.º 6
0
def decode(model, ignore_ids):
    """Decode ids to text."""
    bpe = yttmc.BPE(model)
    bpe.decode_cli(ignore_ids)
Ejemplo n.º 7
0
    def __setstate__(self, dict):
        self.model = dict["model"]
        self.n_threads = dict["n_threads"]

        self.bpe_cython = _youtokentome_cython.BPE(model_path=self.model,
                                                   n_threads=self.n_threads)