Example #1
0
def compile(words, dict_filename, langmodel_filename):
    """
    Writes the given list of words out as dictionary.dic and languagemodel.ml,
    for use as parameters in Mic
    """

    sentences_file = tmp_path('sentences.txt')
    idgram_file = tmp_path('temp.idgram')

    words = [w.upper() for w in words]
    words = list(set(words))

    # create the dictionary
    pronounced = g2p.translateWords(words)
    zipped = zip(words, pronounced)
    lines = ["%s %s" % (x, y) for x, y in zipped]

    with open(dict_filename, "w") as f:
        f.write("\n".join(lines) + "\n")

    # create the language model
    with open(sentences_file, "w") as f:
        f.write("\n".join(words) + "\n")
        f.write("<s> \n </s> \n")
        f.close()

    # make language model
    os.system(
        "text2idngram -vocab {sentences} < {sentences} -idngram {idgram}".format(
            sentences=sentences_file, idgram=idgram_file))
    os.system(
        "idngram2lm -idngram {idgram} -vocab {sentences} -arpa {langmodel}".format(
            idgram=idgram_file, sentences=sentences_file, langmodel=langmodel_filename))
    return True
Example #2
0
def compile(words, dict_filename, langmodel_filename):
    """
    Writes the given list of words out as dictionary.dic and languagemodel.ml,
    for use as parameters in Mic
    """

    sentences_file = tmp_path('sentences.txt')
    idgram_file = tmp_path('temp.idgram')

    words = [w.upper() for w in words]
    words = list(set(words))

    # create the dictionary
    pronounced = g2p.translateWords(words)
    zipped = zip(words, pronounced)
    lines = ["%s %s" % (x, y) for x, y in zipped]

    with open(dict_filename, "w") as f:
        f.write("\n".join(lines) + "\n")

    # create the language model
    with open(sentences_file, "w") as f:
        f.write("\n".join(words) + "\n")
        f.write("<s> \n </s> \n")
        f.close()

    # make language model
    os.system(
        "text2idngram -vocab {sentences} < {sentences} -idngram {idgram}".
        format(sentences=sentences_file, idgram=idgram_file))
    os.system(
        "idngram2lm -idngram {idgram} -vocab {sentences} -arpa {langmodel}".
        format(idgram=idgram_file,
               sentences=sentences_file,
               langmodel=langmodel_filename))
    return True
Example #3
0
File: g2p.py Project: justjake/ear
import os
import subprocess
import re

from config import data_path, tmp_path, G014B2B_FST


TEMP_FILENAME = tmp_path('g2ptemp')
PHONE_MATCH = re.compile(r'<s> (.*) </s>')


def parseLine(line):
    return PHONE_MATCH.search(line).group(1)


def parseOutput(output):
    return PHONE_MATCH.findall(output)


def translateWord(word):
    out = subprocess.check_output(['phonetisaurus-g2p', '--model=%s' %
                                  (G014B2B_FST), '--input=%s' % word])
    return parseLine(out)


def translateWords(words):
    full_text = '\n'.join(words)

    f = open(TEMP_FILENAME, "wb")
    f.write(full_text)
    f.flush()
Example #4
0
File: g2p.py Project: justjake/ear
import os
import subprocess
import re

from config import data_path, tmp_path, G014B2B_FST

TEMP_FILENAME = tmp_path('g2ptemp')
PHONE_MATCH = re.compile(r'<s> (.*) </s>')


def parseLine(line):
    return PHONE_MATCH.search(line).group(1)


def parseOutput(output):
    return PHONE_MATCH.findall(output)


def translateWord(word):
    out = subprocess.check_output([
        'phonetisaurus-g2p',
        '--model=%s' % (G014B2B_FST),
        '--input=%s' % word
    ])
    return parseLine(out)


def translateWords(words):
    full_text = '\n'.join(words)

    f = open(TEMP_FILENAME, "wb")