Example #1
0
    def gen_slm_dependencies(self, basename, N=3):
        """
        Generate the dependencies (slm, dictionary) for julius.

        @param basename (str - IN) the base name of the slm file and of the dictionary file
        @param N (int) Language model N-gram length.

        """
        dictname = basename + ".dict"
        slmname  = basename + ".arpa"

        phoneslist = self._phones.split()
        tokenslist = self._tokens.split()

        dictpron = DictPron()

        for token,pron in zip(tokenslist,phoneslist):
            for variant in pron.split("|"):
                dictpron.add_pron( token, variant.replace("-"," ") )

        if dictpron.is_unk(START_SENT_SYMBOL) is True:
            dictpron.add_pron( START_SENT_SYMBOL, "sil" )
        if dictpron.is_unk(END_SENT_SYMBOL) is True:
            dictpron.add_pron(  END_SENT_SYMBOL, "sil" )

        dictpron.save_as_ascii( dictname, False )

        # Write the SLM
        model = NgramsModel(N)
        model.append_sentences( [self._tokens] )
        probas = model.probabilities( method="logml" )
        arpaio = ArpaIO()
        arpaio.set( probas )
        arpaio.save( slmname )
Example #2
0
    def load_from_arpa(self, filename):
        """
        Load the model from an ARPA-ASCII file.

        @param filename (str - IN) Filename from which to read the model.

        """
        arpaio = ArpaIO()
        self.model = arpaio.load(filename)
Example #3
0
    def save_as_arpa(self, filename):
        """
        Save the model into an ARPA-ASCII file.

        @param filename (str - OUT) Filename in which to write the model.

        """
        arpaio = ArpaIO()
        arpaio.set( self.model )
        arpaio.save( filename )
Example #4
0
    def testARPA(self):
        fn1 = os.path.join(TEMP,"model1.arpa")
        fn2 = os.path.join(TEMP,"model2.arpa")
        model = NgramsModel(3)
        model.count( self.corpusfile )
        probas = model.probabilities("logml")
        arpaio = ArpaIO()
        arpaio.set( probas )
        arpaio.save( fn1 )

        slm1 = SLM()
        slm1.load_from_arpa( fn1 )
        slm1.save_as_arpa( fn2 )

        slm2 = SLM()
        slm2.load_from_arpa( fn2 )

        m1 = slm1.model
        m2 = slm2.model
        self.assertTrue( compare(m1,m2) )
Example #5
0
# ----------------------------------------------------------------------------
# Main program
# ----------------------------------------------------------------------------

# ---------------------------------
# 1. Create a NgramsModel

model = NgramsModel( args.n )
if args.r:
    model.set_vocab( args.r )

# ---------------------------------
# 2. Estimate counts of each n-gram

model.count( *(args.i) )

# ---------------------------------
# 3. Estimate probabilities

probas = model.probabilities( args.m )

# ---------------------------------
# 4. Write in an ARPA file

arpaio = ArpaIO()
arpaio.set( probas )
arpaio.save( args.o )

# ---------------------------------------------------------------------------