def generate_phonemes(self): """ Step 3. Generate phoneme list from word list. Map phonemes to single characters to simplify encoding issues. """ self.log.debug("G2P.generate_phonemes()") phones = [] for w in self.dic: for p in w.phonemes: if p not in phones: phones.append(p) (self.pmap_c2s, self.pmap_s2c) = charmap.create_char_maps(phones) for w in self.dic: for i in range(len(w.phonemes)): w.phonemes[i] = self.pmap_s2c[w.phonemes[i]] self.phonemes = self.pmap_c2s.keys()
def generate_graphemes(self): """ Step 2. Generate grapheme list from word list. Map graphemes to single characters to simplify encoding issues. """ self.log.debug("G2P.generate_graphemes()") graphs = [] for w in self.dic: for g in w.graphemes: if g not in graphs: graphs.append(g) (self.gmap_c2s, self.gmap_s2c) = charmap.create_char_maps(graphs) for w in self.dic: for i in range(len(w.graphemes)): w.graphemes[i] = self.gmap_s2c[w.graphemes[i]] # Add null grapheme possibility self.graphemes = [ '0' ] self.graphemes.extend( self.gmap_c2s.keys())