Ejemplo n.º 1
0
 def make_dictionary():
     """construct dictionary."""
     d = TokenDictionary()
     alphabet = string.ascii_lowercase
     for token in alphabet:
         d.add_symbol(token)
     d.add_symbol('<space>')
     d.finalize(padding_factor=1)  # don't add extra padding symbols
     d.space_index = d.indices.get('<space>', -1)
     return d
Ejemplo n.º 2
0
 def make_dictionary(vocab, non_lang_syms=[]):
     """construct dictionary."""
     assert isinstance(vocab, list) and isinstance(non_lang_syms, list)
     d = TokenDictionary()
     for token in vocab:
         d.add_symbol(token)
     d.add_symbol('<space>')
     for token in non_lang_syms:
         d.add_symbol(token)
     d.finalize(padding_factor=1)  # don't add extra padding symbols
     d.space_index = d.indices.get('<space>', -1)
     return d