def compile(words, dict_filename, langmodel_filename): """ Writes the given list of words out as dictionary.dic and languagemodel.ml, for use as parameters in Mic """ sentences_file = tmp_path('sentences.txt') idgram_file = tmp_path('temp.idgram') words = [w.upper() for w in words] words = list(set(words)) # create the dictionary pronounced = g2p.translateWords(words) zipped = zip(words, pronounced) lines = ["%s %s" % (x, y) for x, y in zipped] with open(dict_filename, "w") as f: f.write("\n".join(lines) + "\n") # create the language model with open(sentences_file, "w") as f: f.write("\n".join(words) + "\n") f.write("<s> \n </s> \n") f.close() # make language model os.system( "text2idngram -vocab {sentences} < {sentences} -idngram {idgram}".format( sentences=sentences_file, idgram=idgram_file)) os.system( "idngram2lm -idngram {idgram} -vocab {sentences} -arpa {langmodel}".format( idgram=idgram_file, sentences=sentences_file, langmodel=langmodel_filename)) return True
def compile(words, dict_filename, langmodel_filename): """ Writes the given list of words out as dictionary.dic and languagemodel.ml, for use as parameters in Mic """ sentences_file = tmp_path('sentences.txt') idgram_file = tmp_path('temp.idgram') words = [w.upper() for w in words] words = list(set(words)) # create the dictionary pronounced = g2p.translateWords(words) zipped = zip(words, pronounced) lines = ["%s %s" % (x, y) for x, y in zipped] with open(dict_filename, "w") as f: f.write("\n".join(lines) + "\n") # create the language model with open(sentences_file, "w") as f: f.write("\n".join(words) + "\n") f.write("<s> \n </s> \n") f.close() # make language model os.system( "text2idngram -vocab {sentences} < {sentences} -idngram {idgram}". format(sentences=sentences_file, idgram=idgram_file)) os.system( "idngram2lm -idngram {idgram} -vocab {sentences} -arpa {langmodel}". format(idgram=idgram_file, sentences=sentences_file, langmodel=langmodel_filename)) return True
import os import subprocess import re from config import data_path, tmp_path, G014B2B_FST TEMP_FILENAME = tmp_path('g2ptemp') PHONE_MATCH = re.compile(r'<s> (.*) </s>') def parseLine(line): return PHONE_MATCH.search(line).group(1) def parseOutput(output): return PHONE_MATCH.findall(output) def translateWord(word): out = subprocess.check_output(['phonetisaurus-g2p', '--model=%s' % (G014B2B_FST), '--input=%s' % word]) return parseLine(out) def translateWords(words): full_text = '\n'.join(words) f = open(TEMP_FILENAME, "wb") f.write(full_text) f.flush()
import os import subprocess import re from config import data_path, tmp_path, G014B2B_FST TEMP_FILENAME = tmp_path('g2ptemp') PHONE_MATCH = re.compile(r'<s> (.*) </s>') def parseLine(line): return PHONE_MATCH.search(line).group(1) def parseOutput(output): return PHONE_MATCH.findall(output) def translateWord(word): out = subprocess.check_output([ 'phonetisaurus-g2p', '--model=%s' % (G014B2B_FST), '--input=%s' % word ]) return parseLine(out) def translateWords(words): full_text = '\n'.join(words) f = open(TEMP_FILENAME, "wb")