def generate_symbol_tables(lexicon, n=3): ''' Return word, phone and state symbol tables based on the supplied lexicon Args: lexicon (dict): lexicon to use, created from the parse_lexicon() function n (int): number of states for each phone HMM Returns: word_table (fst.SymbolTable): table of words phone_table (fst.SymbolTable): table of phones state_table (fst.SymbolTable): table of HMM phone-state IDs ''' state_table = fst.SymbolTable() phone_table = fst.SymbolTable() word_table = fst.SymbolTable() state_table.add_symbol('<eps>') phone_table.add_symbol('<eps>') word_table.add_symbol('<eps>') for word, phones in lexicon.items(): word_table.add_symbol(word) for phone in phones: phone_table.add_symbol(phone) for i in range(1, n+1): state_table.add_symbol('{}_{}'.format(phone,i)) return word_table, phone_table, state_table
def generate_output_table(word_table, phone_table): output_table = fst.SymbolTable() for phone_id, phone_str in list(phone_table): output_table.add_symbol(phone_str) for word_id, word_str in list(word_table): output_table.add_symbol(word_str) return output_table
def generate_symbols_table(lexicon, n=3): ''' Return word, phone and state symbol tables based on the supplied lexicon Args: lexicon (dict): lexicon to use, created from the parse_lexicon() function n (int): number of states for each phone HMM Returns: word_table (fst.SymbolTable): table of words phone_table (fst.SymbolTable): table of phones state_table (fst.SymbolTable): table of HMM phone-state IDs ''' state_table = fst.SymbolTable() phone_table = fst.SymbolTable() word_table = fst.SymbolTable() # add empty <eps> symbol to all tables state_table.add_symbol('<eps>') phone_table.add_symbol('<eps>') word_table.add_symbol('<eps>') for word, phone_list in lexicon.items(): word_table.add_symbol(word) for phones in phone_list: # for each phone for p in phones: # print(f"Adding phone {p} from word {word} to phone table") phone_table.add_symbol(p) for i in range(1, n + 1): # for each state 1 to n state_table.add_symbol('{}_{}'.format(p, i)) # <!> DEBUGS # for word in word_table: # print(word) # print('word_table: {}\nphone_table: {}\nstate_table: {}'.format(list(word_table), list(phone_table), list(state_table))) return word_table, phone_table, state_table
# We start by importing the OpenFst Python wrapper: # In[1]: import openfst_python as fst import math partner = "s1922482" # Minghui Zhao # Then we create tables for our symbols # In[2]: input_sym = fst.SymbolTable() output_sym = fst.SymbolTable() input_sym.add_symbol('<eps>') # by convention, <eps> always # has symbol zero input_sym.add_symbol('a') # input symbols input_sym.add_symbol('b') output_sym.add_symbol('<eps>') # output symbols output_sym.add_symbol('d') output_sym.add_symbol('c') # A **SymbolTable()** is simply a table associating symbols and indexes. We add symbols to the table with the method **add_symbol()** # # Now that we've got our symbol tables, we will build the FST itself:
#================================================ # List of symbols # You need to modify the list below to match # the symbols you need. My advice would be: # (1) Draw the lattice on a piece of paper, # (2) Make a list of the transitions, and # (3) As you go through the transitions, # include your symbolss in the symbol # list. # # Notice that the first symbol is the # "epsilon", for when you expect empty strings. #================================================ fstSymbols = fst.SymbolTable() fstSymbols.add_symbol("<eps>", 0) fstSymbols.add_symbol("c", 1) fstSymbols.add_symbol("a", 2) fstSymbols.add_symbol("t", 3) fstSymbols.add_symbol("s", 4) fstSymbols.add_symbol("-PL", 5) fstSymbols.add_symbol("i", 6) fstSymbols.add_symbol("e", 7) fstSymbols.add_symbol("y-PL", 8) fstSymbols.add_symbol("d", 9) fstSymbols.add_symbol("o", 10) fstSymbols.add_symbol("g", 11) fstSymbols.add_symbol("y", 12) fstSymbols.add_symbol("u", 13) fstSymbols.add_symbol("-s", 14)