def test_basic_unigram_counts(self): z = Corpus("data/ex.unicode") for letter in z.next_tamil_letter(): # if ( LINUX ): print(letter) pass # LetterModels q = Unigram("data/ex.unicode") q.frequency_model() self.assertEqual(q.letter[u"ஷை"] + q.letter[u"சி"], q.letter[u"ந"]) del z, q
def test_basic_unigram_counts(self): z = Corpus("data/ex.unicode") for letter in z.next_tamil_letter(): if ( LINUX ): print(letter) q = LetterModels.Unigram( "data/ex.unicode" ) q.frequency_model( ) if not PYTHON3: if ( LINUX ): print(unicode(q)) else: if ( LINUX ): print( q ) assert( q.letter[u"ஷை"] + q.letter[u"சி"] == q.letter[u"ந"] ) del z, q
def test_basic_unigram_counts(self): z = Corpus("data/ex.unicode") for letter in z.next_tamil_letter(): if (LINUX): print(letter) q = LetterModels.Unigram("data/ex.unicode") q.frequency_model() if not PYTHON3: if (LINUX): print(unicode(q)) else: if (LINUX): print(q) assert (q.letter[u"ஷை"] + q.letter[u"சி"] == q.letter[u"ந"]) del z, q
def test_basic_unigram_counts(self): z = Corpus("data/ex.unicode") for letter in z.next_tamil_letter(): # if ( LINUX ): print(letter) pass # LetterModels q = Unigram("data/ex.unicode") q.frequency_model() if not PYTHON3: # if ( LINUX ): print(unicode(q)) pass else: # if ( LINUX ): print( q ) pass self.assertEqual(q.letter[u"ஷை"] + q.letter[u"சி"], q.letter[u"ந"]) del z, q
def run(parent,outputfile): x=None for filename in glob.glob(os.path.join(parent,"*.word")): if not x: x = Unigram(filename) else: x.corpus = Corpus(filename) #update file x.frequency_model() x.save(outputfile) proc_stats(get_prob(x.letter), outputfile) return