def encode(model, output_type, n_threads, bos, eos, reverse, stream, dropout_prob): """Encode text to ids or subwords.""" if n_threads < -1 or n_threads == 0: raise ValueError( 'Invalid value for "--n_threads": must be -1 or positive integer, not "%d"' % n_threads) bpe = yttmc.BPE(model, n_threads) bpe.encode_cli(output_type, stream, bos, eos, reverse, dropout_prob)
def encode(model, output_type, n_threads, bos, eos, reverse, stream): """Encode text to ids or subwords.""" output_type = output_type.lower() if output_type != "id" and output_type != "subword": raise ValueError( 'Invalid value for "--output_type": must be equal to "id" or "subword", not "%d".' % output_type) if n_threads < -1 or n_threads == 0: raise ValueError( 'Invalid value for "--n_threads": must be -1 or positive integer, not "%d"' % n_threads) bpe = yttmc.BPE(model, n_threads) bpe.encode_cli(output_type, stream, bos, eos, reverse)
def __init__(self, model: str, n_threads: int = -1): self.bpe_cython = _youtokentome_cython.BPE(model_path=model, n_threads=n_threads)
def vocab(model, verbose): """Print list of learned subwords.""" bpe = yttmc.BPE(model) bpe.vocab_cli(verbose)
def decode(model): """Decode ids to text.""" bpe = yttmc.BPE(model) bpe.decode_cli()
def decode(model, ignore_ids): """Decode ids to text.""" bpe = yttmc.BPE(model) bpe.decode_cli(ignore_ids)
def __setstate__(self, dict): self.model = dict["model"] self.n_threads = dict["n_threads"] self.bpe_cython = _youtokentome_cython.BPE(model_path=self.model, n_threads=self.n_threads)