def test_min_padding_length(self): sentence = "AllenNLP is awesome ." tokens = [Token(token) for token in sentence.split(" ")] vocab = Vocabulary() vocab.add_token_to_namespace("A", namespace="characters") # 2 vocab.add_token_to_namespace("l", namespace="characters") # 3 vocab.add_token_to_namespace("e", namespace="characters") # 4 vocab.add_token_to_namespace("n", namespace="characters") # 5 vocab.add_token_to_namespace("N", namespace="characters") # 6 vocab.add_token_to_namespace("L", namespace="characters") # 7 vocab.add_token_to_namespace("P", namespace="characters") # 8 vocab.add_token_to_namespace("i", namespace="characters") # 9 vocab.add_token_to_namespace("s", namespace="characters") # 10 vocab.add_token_to_namespace("a", namespace="characters") # 11 vocab.add_token_to_namespace("w", namespace="characters") # 12 vocab.add_token_to_namespace("o", namespace="characters") # 13 vocab.add_token_to_namespace("m", namespace="characters") # 14 vocab.add_token_to_namespace(".", namespace="characters") # 15 indexer = TokenCharactersIndexer("characters", min_padding_length=10) indices = indexer.tokens_to_indices(tokens, vocab, "char") key_padding_lengths = "num_token_characters" value_padding_lengths = 0 for token in indices["char"]: item = indexer.get_padding_lengths(token) value = item.values() value_padding_lengths = max(value_padding_lengths, max(value)) padded = indexer.as_padded_tensor( indices, {"char": len(indices["char"])}, {key_padding_lengths: value_padding_lengths}) assert padded["char"].tolist() == [[2, 3, 3, 4, 5, 6, 7, 8, 0, 0], [9, 10, 0, 0, 0, 0, 0, 0, 0, 0], [11, 12, 4, 10, 13, 14, 4, 0, 0, 0], [15, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
def test_as_array_produces_token_sequence(self): indexer = TokenCharactersIndexer("characters", min_padding_length=1) padded_tokens = indexer.as_padded_tensor( {'k': [[1, 2, 3, 4, 5], [1, 2, 3], [1]]}, desired_num_tokens={'k': 4}, padding_lengths={"num_token_characters": 10}) assert padded_tokens["k"].tolist() == [[1, 2, 3, 4, 5, 0, 0, 0, 0, 0], [1, 2, 3, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]