Exemple #1
0
 def testUnion(self):
     foodict = s3dict.open(os.path.join(self.basedir, "foo.dict"))
     bardict = s3dict.open(os.path.join(self.basedir, "bar.dict"))
     bazdict = s3dict.union(foodict, bardict)
     self.assertEquals(foodict['ACTUALLY'], bazdict['ACTUALLY'])
     self.assert_('ABANDONED' in bazdict)
     self.assert_('ZONES' in bazdict)
     self.assert_('ZSWANG' in bazdict)
Exemple #2
0
 def testRead(self):
     foodict = s3dict.open(os.path.join(self.basedir, "foo.dict"))
     self.assert_('AH' in foodict.phoneset)
     self.assertEquals(foodict.get_phones('A'), ['AH'])
     self.assertEquals(foodict.get_alt_phones('A', 2), ['EY'])
     self.assertEquals(foodict.get_phones('ZSWANG'), ['S', 'W', 'AE', 'NG'])
     try:
         foo = foodict.get_phones('QRXG')
         print foo
     except KeyError:
         pass # Expected fail
     else:
         self.fail()
     try:
         foo = foodict.get_alt_phones('A',3)
     except IndexError:
         pass # Expected fail
     else:
         self.fail()
     try:
         foo = foodict.get_alt_phones('!@#$!@',3)
     except KeyError:
         pass # Expected fail
     else:
         self.fail()
     self.assertEquals(foodict['A'], ['AH'])
     self.assertEquals(foodict['A',2], ['EY'])
     self.assertEquals(foodict['A(2)'], ['EY'])
     self.assertEquals(foodict['ZSWANG'], ['S', 'W', 'AE', 'NG'])
Exemple #3
0
Generate a single-pronunciation dictionary from an input dictionary
and the output of force alignment.
"""

__author__ = "David Huggins-Daines <*****@*****.**>"
__version__ = "$Revision $"

from collections import defaultdict
import s3dict
import sys

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print>>sys.stderr, "Usage: %s INDICT FALIGNOUT [OUTDICT]" % sys.argv[0]
        sys.exit(1)
    indict = s3dict.open(sys.argv[1])
    counts = defaultdict(int)
    falignout = file(sys.argv[2])
    for spam in falignout:
        for word in spam.split()[:-1]:
            if word in indict:
                counts[word] += 1
    words = list(indict.words())
    words.sort()
    if len(sys.argv) > 3:
        outfh = file(sys.argv[3], "w")
    else:
        outfh = sys.stdout
    for w in words:
        alts = sum(1 for x in indict.alts(w))
        if alts == 1: