Beispiel #1
0
 def test_basic_unigram_counts(self):
     z = Corpus("data/ex.unicode")
     for letter in z.next_tamil_letter():
         # if ( LINUX ): print(letter)
         pass
     # LetterModels
     q = Unigram("data/ex.unicode")
     q.frequency_model()
     self.assertEqual(q.letter[u"ஷை"] + q.letter[u"சி"], q.letter[u"ந"])
     del z, q
 def test_basic_unigram_counts(self):
     z = Corpus("data/ex.unicode")
     for letter in z.next_tamil_letter():
         if ( LINUX ): print(letter)
     
     q = LetterModels.Unigram( "data/ex.unicode" )
     q.frequency_model( )
     if not PYTHON3:
         if ( LINUX ): print(unicode(q))
     else:
         if ( LINUX ): print( q )
     assert( q.letter[u"ஷை"] + q.letter[u"சி"] == q.letter[u"ந"] )
     del z, q
    def test_basic_unigram_counts(self):
        z = Corpus("data/ex.unicode")
        for letter in z.next_tamil_letter():
            if (LINUX): print(letter)

        q = LetterModels.Unigram("data/ex.unicode")
        q.frequency_model()
        if not PYTHON3:
            if (LINUX): print(unicode(q))
        else:
            if (LINUX): print(q)
        assert (q.letter[u"ஷை"] + q.letter[u"சி"] == q.letter[u"ந"])
        del z, q
Beispiel #4
0
 def test_basic_unigram_counts(self):
     z = Corpus("data/ex.unicode")
     for letter in z.next_tamil_letter():
         # if ( LINUX ): print(letter)
         pass
     # LetterModels
     q = Unigram("data/ex.unicode")
     q.frequency_model()
     if not PYTHON3:
         # if ( LINUX ):  print(unicode(q))
         pass
     else:
         # if ( LINUX ):  print( q )
         pass
     self.assertEqual(q.letter[u"ஷை"] + q.letter[u"சி"], q.letter[u"ந"])
     del z, q
def run(parent,outputfile):
    x=None
    for filename in glob.glob(os.path.join(parent,"*.word")):
        if not x:
            x = Unigram(filename)
        else:
            x.corpus = Corpus(filename) #update file
        x.frequency_model()
    x.save(outputfile)
    proc_stats(get_prob(x.letter), outputfile)
    return