def gen_slm_dependencies(self, basename, N=3): """ Generate the dependencies (slm, dictionary) for julius. @param basename (str - IN) the base name of the slm file and of the dictionary file @param N (int) Language model N-gram length. """ dictname = basename + ".dict" slmname = basename + ".arpa" phoneslist = self._phones.split() tokenslist = self._tokens.split() dictpron = DictPron() for token,pron in zip(tokenslist,phoneslist): for variant in pron.split("|"): dictpron.add_pron( token, variant.replace("-"," ") ) if dictpron.is_unk(START_SENT_SYMBOL) is True: dictpron.add_pron( START_SENT_SYMBOL, "sil" ) if dictpron.is_unk(END_SENT_SYMBOL) is True: dictpron.add_pron( END_SENT_SYMBOL, "sil" ) dictpron.save_as_ascii( dictname, False ) # Write the SLM model = NgramsModel(N) model.append_sentences( [self._tokens] ) probas = model.probabilities( method="logml" ) arpaio = ArpaIO() arpaio.set( probas ) arpaio.save( slmname )
def load_from_arpa(self, filename): """ Load the model from an ARPA-ASCII file. @param filename (str - IN) Filename from which to read the model. """ arpaio = ArpaIO() self.model = arpaio.load(filename)
def save_as_arpa(self, filename): """ Save the model into an ARPA-ASCII file. @param filename (str - OUT) Filename in which to write the model. """ arpaio = ArpaIO() arpaio.set( self.model ) arpaio.save( filename )
def testARPA(self): fn1 = os.path.join(TEMP,"model1.arpa") fn2 = os.path.join(TEMP,"model2.arpa") model = NgramsModel(3) model.count( self.corpusfile ) probas = model.probabilities("logml") arpaio = ArpaIO() arpaio.set( probas ) arpaio.save( fn1 ) slm1 = SLM() slm1.load_from_arpa( fn1 ) slm1.save_as_arpa( fn2 ) slm2 = SLM() slm2.load_from_arpa( fn2 ) m1 = slm1.model m2 = slm2.model self.assertTrue( compare(m1,m2) )
# ---------------------------------------------------------------------------- # Main program # ---------------------------------------------------------------------------- # --------------------------------- # 1. Create a NgramsModel model = NgramsModel( args.n ) if args.r: model.set_vocab( args.r ) # --------------------------------- # 2. Estimate counts of each n-gram model.count( *(args.i) ) # --------------------------------- # 3. Estimate probabilities probas = model.probabilities( args.m ) # --------------------------------- # 4. Write in an ARPA file arpaio = ArpaIO() arpaio.set( probas ) arpaio.save( args.o ) # ---------------------------------------------------------------------------