Python CharTknzr.norm Examples

Programming Language: Python

Namespace/Package Name: lmp.tknzr._char

Class/Type: CharTknzr

Method/Function: norm

Examples at hotexamples.com: 5

Python CharTknzr.norm - 5 examples found. These are the top rated real world Python examples of lmp.tknzr._char.CharTknzr.norm extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CharTknzr(15)

build_vocab(11)

norm(5)

add_CLI_args(1)

batch_dec(1)

batch_enc(1)

dec(1)

dtknz(1)

enc(1)

tknz(1)

Example #1

Show file

def test_no_limit_build() -> None:
    """Include all tokens when ``max_vocab == -1``."""
    tknzr = CharTknzr(is_uncased=False, max_vocab=-1, min_count=0)
    CJK_txt = [chr(i) for i in range(ord('\u4e00'), ord('\u9fff') + 1)]
    norm_CJK_txt = [tknzr.norm(t) for t in CJK_txt]
    tknzr.build_vocab(CJK_txt)
    assert tknzr.vocab_size == len(set(norm_CJK_txt)) + 4
    assert all(map(lambda tk: tk in tknzr.tk2id, norm_CJK_txt))

Example #2

Show file

File: test_norm.py Project: ProFatXuanAll/language-model-playground

def test_uncased(tknzr: CharTknzr, uncased_txt: Dict[str, str]) -> None:
  """Convert output text to lower cases when ``is_uncased == True``."""
  assert (tknzr.is_uncased and tknzr.norm(uncased_txt['input']) == uncased_txt['output']) or \
    (not tknzr.is_uncased and tknzr.norm(uncased_txt['input']) == uncased_txt['input'])

Example #3

Show file

File: test_norm.py Project: ProFatXuanAll/language-model-playground

def test_collapse_whitespace(tknzr: CharTknzr, ws_collapse_txt: Dict[str, str]) -> None:
  """Collapse consecutive whitespaces."""
  assert tknzr.norm(ws_collapse_txt['input']) == ws_collapse_txt['output']

Example #4

Show file

File: test_norm.py Project: ProFatXuanAll/language-model-playground

def test_strip_whitespace(tknzr: CharTknzr, ws_strip_txt: Dict[str, str]) -> None:
  """Strip text."""
  assert tknzr.norm(ws_strip_txt['input']) == ws_strip_txt['output']

Example #5

Show file

File: test_norm.py Project: ProFatXuanAll/language-model-playground

def test_nfkc(nfkc_txt: Dict[str, str], tknzr: CharTknzr) -> None:
  """Normalize text with NFKC."""
  assert tknzr.norm(nfkc_txt['input']) == nfkc_txt['output']