def make_dictionary(): """construct dictionary.""" d = AsrDictionary() alphabet = string.ascii_lowercase for token in alphabet: d.add_symbol(token) d.add_symbol("<space>") d.finalize(padding_factor=1) # don't add extra padding symbols d.space_index = d.indices.get("<space>", -1) return d
def make_dictionary(vocab, non_lang_syms=[]): """construct dictionary.""" assert isinstance(vocab, list) and isinstance(non_lang_syms, list) d = AsrDictionary() for token in vocab: d.add_symbol(token) d.add_symbol('<space>') for token in non_lang_syms: d.add_symbol(token) d.finalize(padding_factor=1) # don't add extra padding symbols d.space_index = d.indices.get('<space>', -1) return d
def make_dictionary(vocab, non_lang_syms=[]): """construct dictionary.""" assert isinstance(vocab, list) and isinstance(non_lang_syms, list) d = AsrDictionary() d.non_lang_syms = non_lang_syms args = Namespace(bpe="characters_asr") d.build_bpe(args) for token in vocab: d.add_symbol(token) d.add_symbol("<space>") for token in non_lang_syms: d.add_symbol(token) d.finalize(padding_factor=1) # don't add extra padding symbols d.space_index = d.indices.get("<space>", -1) return d