def test_default_label_indices_generator(): num_tokens = random.randint(1, 100) tokens = (random_string() for _ in range(num_tokens)) vec = Token1DVectorizer() assert vec.valid_label_indices(tokens) == [i for i in range(num_tokens)]
def __init__(self, nctx, chars_per_word): y_vectorizer = Token1DVectorizer(transform_fn=baseline.lowercase) x_vectorizer = Char2DVectorizer(mxwlen=chars_per_word) super().__init__(nctx, {'x': x_vectorizer, 'y': y_vectorizer}) self.chars_per_word = chars_per_word