コード例 #1
0
ファイル: test_stem.py プロジェクト: manu-chroma/cltk
 def test_syllabify(self):
     """Test Indic Syllabifier method"""
     correct = ['न', 'म', 'स्ते']
     syllabifier = IndianSyllabifier('hindi')
     current = syllabifier.orthographic_syllabify('नमस्ते')
     self.assertEqual(current, correct)
コード例 #2
0
ファイル: test_stem.py プロジェクト: vierth/cltk
 def test_syllabify(self):
     """Test Indic Syllabifier method"""
     correct = ['न', 'म', 'स्ते']
     syllabifier = IndianSyllabifier('hindi')
     current = syllabifier.orthographic_syllabify('नमस्ते')
     self.assertEqual(current, correct)
コード例 #3
0
    infile.close()
    outfile = open(outfilename, "w")

    for shlok in filestring:

        #picking one shloka from the file
        t_shlok = tokenizer.tokenize(shlok)
        #initializing the flags
        count = 0  # to count the number of phonemes after which the split has to be done
        pos = 0  # to insert the -
        diff = 0  # to keep track of the overflow phonemes

        for i in range(len(t_shlok)):
            token = t_shlok[i]
            split = syl.orthographic_syllabify(token)
            l = len(split)

            # phonemes already covered
            prev = count

            #checking for purna-viram and numbers
            if l == 1 and check_token(token) == False:
                diff = pos = count = 0
                continue

            # more phonemes added
            count = count + l

            # word extends the meter length
            if count > 8: