コード例 #1
0
def test_tknz(parameters, test_input: str, expected: List[str]):
    r"""Tokenize text into characters."""

    tknzr = CharTknzr(
        is_uncased=parameters['is_uncased'],
        max_vocab=parameters['max_vocab'],
        min_count=parameters['min_count'],
    )
    assert tknzr.tknz(test_input) == expected
コード例 #2
0
def test_char_tknzr(capsys, char_tknzr: CharTknzr, exp_name: str, seed: int) -> None:
  """Ensure tokenize script output consistency when using :py:class:`lmp.tknzr.CharTknzr`."""
  txt = 'abc'

  lmp.script.tknz_txt.main(argv=[
    '--exp_name',
    exp_name,
    '--seed',
    str(seed),
    '--txt',
    txt,
  ])

  captured = capsys.readouterr()
  assert str(char_tknzr.tknz(txt=txt)) in captured.out