Python DictPron Examples

Programming Language: Python

Namespace/Package Name: resources.dictpron

Class/Type: DictPron

Examples at hotexamples.com: 6

Python DictPron - 6 examples found. These are the top rated real world Python examples of resources.dictpron.DictPron extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

save_as_ascii(3)

add_pron(2)

get_pron(2)

is_unk(2)

DictPron(1)

get_keys(1)

values(1)

Example #1

Show file

File: phoneset.py Project: brigittebigi/sppas

    def add_from_dict(self, dictfilename):
        """
        Add the list of phones from a pronunciation dictionary.

        @param dictfilename (str) is the name of an HTK-ASCII pronunciation dictionary

        """
        d = DictPron( dictfilename ).get_dict()
        for value in d.values():
            variants = value.split("|")
            for variant in variants:
                phones = variant.split("-")
                for phone in phones:
                    self.add( phone )

Example #2

Show file

File: test_dictpron.py Project: drammock/sppas

 def test_save(self):
     d = DictPron( DICT_FRA )
     d.save_as_ascii( DICT_FRA+".copy" )
     d2 = DictPron( DICT_FRA+".copy", nodump=True )
     for w in d.get_keys():
         self.assertEqual( d.get_pron(w), d2.get_pron(w) )
     os.remove( DICT_FRA+".copy" )

Example #3

Show file

File: hvitealign.py Project: brigittebigi/sppas

    def gen_dependencies(self, grammarname, dictname):
        """
        Generate the dependencies (grammar, dictionary) for HVite.

        @param grammarname is the file name of the tokens
        @param dictname is the dictionary file name

        """
        dictpron = DictPron()

        with codecs.open(grammarname, 'w', encoding) as flab:

            for token,pron in zip(self._tokens.split(),self._phones.split()):

                # dictionary:
                for variant in pron.split("|"):
                    dictpron.add_pron( token, variant.replace("-"," ") )
                    if self._infersp is True:
                        variant = variant + '-sil'
                        dictpron.add_pron( token, variant.replace("-"," ") )

                # lab file (one token per line)
                flab.write( token+"\n")

        dictpron.save_as_ascii( dictname )

Example #4

Show file

File: test_dictpron.py Project: drammock/sppas

 def test_dict(self):
     d = DictPron( DICT_FRA )
     self.assertTrue( d.is_unk('azerty') )
     self.assertFalse( d.is_unk('il_y_a') )
     self.assertFalse( d.is_unk(u'être') )
     self.assertEqual( d.get_pron(u'sil'), "s.i.l" )
     self.assertEqual( d.get_pron(u'azerty'), "UNK" )

Example #5

Show file

File: test_phonetize.py Project: rohithkodali/sppas

    def test_phonetizeFR(self):
        dictdir  = os.path.join(SPPAS, "resources", "dict")
        dictfile = os.path.join(dictdir, "fra.dict")
        dd = DictPron(dictfile)
        grph = DictPhon(dd)
        result = grph.phonetize('pas_encore', phonunk=False)
        self.assertEqual(result, 'UNK')

        result = grph.phonetize('pas_encore', phonunk=True)
        self.assertEqual(result,
                         "p.a.a~.k.o.r|p.a.z.a~.k.o.r|p.a.a~.k.o.r.eu|p.a.z.a~.k.o.r.eu")

        result = grph.phonetize(u'/lemot/', phonunk=True)
        self.assertEqual(result, u"lemot")
        result = grph.phonetize(u'/lemot/', phonunk=False)
        self.assertEqual(result, u"lemot")

Example #6

Show file

File: juliusalign.py Project: brigittebigi/sppas

    def gen_slm_dependencies(self, basename, N=3):
        """
        Generate the dependencies (slm, dictionary) for julius.

        @param basename (str - IN) the base name of the slm file and of the dictionary file
        @param N (int) Language model N-gram length.

        """
        dictname = basename + ".dict"
        slmname  = basename + ".arpa"

        phoneslist = self._phones.split()
        tokenslist = self._tokens.split()

        dictpron = DictPron()

        for token,pron in zip(tokenslist,phoneslist):
            for variant in pron.split("|"):
                dictpron.add_pron( token, variant.replace("-"," ") )

        if dictpron.is_unk(START_SENT_SYMBOL) is True:
            dictpron.add_pron( START_SENT_SYMBOL, "sil" )
        if dictpron.is_unk(END_SENT_SYMBOL) is True:
            dictpron.add_pron(  END_SENT_SYMBOL, "sil" )

        dictpron.save_as_ascii( dictname, False )

        # Write the SLM
        model = NgramsModel(N)
        model.append_sentences( [self._tokens] )
        probas = model.probabilities( method="logml" )
        arpaio = ArpaIO()
        arpaio.set( probas )
        arpaio.save( slmname )