def test_count_vocab_items_case_insensitive(self): indexer = TokenCharacterIndexer( "characters", CharacterTokenizer(lowercase_characters=True), min_padding_length=5) counter = defaultdict(lambda: defaultdict(int)) indexer.count_vocab_items(Token("Hello"), counter) indexer.count_vocab_items(Token("hello"), counter) assert counter["characters"] == {"h": 2, "e": 2, "l": 4, "o": 2}
def test_count_vocab_items_respect_casing(self): indexer = TokenCharacterIndexer("characters", min_padding_length=5) counter = defaultdict(lambda: defaultdict(int)) indexer.count_vocab_items(Token("Hello"), counter) indexer.count_vocab_items(Token("hello"), counter) assert counter["characters"] == { "h": 1, "H": 1, "e": 2, "l": 4, "o": 2 }