def test_compose():
  """Tests function composition."""
  data = np.arange(6).reshape(2, 3)
  assert utils.compose(utils.fst, utils.fst)(data) == 0
  assert utils.compose(utils.snd, utils.fst)(data) == 1
  assert utils.compose(utils.fst, utils.snd)(data) == 3
  assert utils.compose(utils.snd, utils.snd)(data) == 4
  assert utils.compose(str, np.abs, lambda x: x**3)(-2) == '8'
def tokenize_fun(tokenizer):
    """Standard text processing function."""
    wsp = text.WhitespaceTokenizer()
    return utils.compose(tokenizer.tokenize, wsp.tokenize, text.case_fold_utf8)
Esempio n. 3
0
def tokenize_w_punctuation(tokenizer):
    """Text processing function which splits off punctuation."""
    wsp = text.WhitespaceTokenizer()
    return utils.compose(tokenizer.tokenize, wsp.tokenize,
                         tensor_punctuation_separator, text.case_fold_utf8)