Python is_tashkeel 예제들, pyarabic.araby.is_tashkeel Python 예제들

예제 #1

0

파일 보기

파일: test_araby.py 프로젝트: youssefsharief/pyarabic

    def test_is_letter(self):

        self.assertTrue(Araby.is_sukun(Araby.SUKUN))
        self.assertTrue(Araby.is_shadda(Araby.SHADDA))
        self.assertTrue(Araby.is_tatweel(Araby.TATWEEL))

        for archar in Araby.TANWIN:
            self.assertTrue(Araby.is_tanwin(archar))

        for archar in Araby.TASHKEEL:
            self.assertTrue(Araby.is_tashkeel(archar))

        for haraka in Araby.HARAKAT:
            self.assertTrue(Araby.is_haraka(haraka))

        for short_haraka in Araby.SHORTHARAKAT:
            self.assertTrue(Araby.is_shortharaka(short_haraka))

        for liguature in Araby.LIGUATURES:
            self.assertTrue(Araby.is_ligature(liguature))

        for hamza in Araby.HAMZAT:
            self.assertTrue(Araby.is_hamza(hamza))

        for alef in Araby.ALEFAT:
            self.assertTrue(Araby.is_alef(alef))

        for yeh in Araby.YEHLIKE:
            self.assertTrue(Araby.is_yehlike(yeh))

        for waw in Araby.WAWLIKE:
            self.assertTrue(Araby.is_wawlike(waw))

        for teh in Araby.TEHLIKE:
            self.assertTrue(Araby.is_teh)

        for small in Araby.SMALL:
            self.assertTrue(Araby.is_small(small))

        for weak in Araby.WEAK:
            self.assertTrue(Araby.is_weak(weak))

        for archar in Araby.MOON:
            self.assertTrue(Araby.is_moon(archar))

        for archar in  Araby.SUN:
            self.assertTrue(Araby.is_sun(archar))

예제 #2

0

파일 보기

파일: util.py 프로젝트: devna-dev/durar-backend

 def get_tashkeel_position(page, start, end):
     if not page or start is None or start < 0 or not end:
         return None
     tashkeel_start = None
     tashkeel_end = None
     counter = -1
     index = 0
     for char in page:
         if not is_tashkeel(char):
             counter += 1
             if counter == start:
                 tashkeel_start = index
         if counter == end:
             tashkeel_end = index
             break
         index += 1
     return MarkPosition(tashkeel_start, tashkeel_end)

예제 #3

0

파일 보기

파일: test_unicode.py 프로젝트: sinkingtitanic/pyarabic

    division,
)
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import pyarabic.araby as araby

for c in araby.arabicrange():
    print(c, '\t', araby.name(c), end=" ")
    print('\t', end=" ")
    if araby.is_sukun(c): print("sukun", end=" ")
    if araby.is_haraka(c): print("haraka", end=" ")
    if araby.is_shadda(c): print("shadda", end=" ")
    if araby.is_tatweel(c): print("tatweel", end=" ")
    if araby.is_tashkeel(c): print("tashkeel", end=" ")
    if araby.is_tanwin(c): print("tanwin", end=" ")
    if araby.is_shortharaka(c): print("short haraka", end=" ")
    if araby.is_ligature(c): print(" ligature", end=" ")
    if araby.is_ligature(c): print('ligature', end=" ")
    if araby.is_hamza(c): print('hamza', end=" ")
    if araby.is_alef(c): print('alef', end=" ")
    if araby.is_yehlike(c): print('yeh', end=" ")
    if araby.is_wawlike(c): print('waw', end=" ")
    if araby.is_teh(c): print('teh', end=" ")
    if araby.is_small(c): print('small', end=" ")
    if araby.is_weak(c): print('weak', end=" ")
    if araby.is_moon(c): print('moon', end=" ")
    if araby.is_sun(c): print('sun', end=" ")
    print(araby.order(c), end=" ")
    print()

예제 #4

0

파일 보기

파일: pyArabicExample.py 프로젝트: youssefsharief/pyarabic

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append('../')
from  pyarabic import araby


for c in araby.arabicrange():
    print (c,'\t', araby.name(c))
    print ('\t')
    if araby.is_sukun(c): print ("sukun")
    if araby.is_haraka(c): print ("haraka")
    if araby.is_shadda(c): print ("shadda")
    if araby.is_tatweel(c): print ("tatweel")
    if araby.is_tashkeel(c): print ("tashkeel")
    if araby.is_tanwin(c): print ("tanwin")
    if araby.is_shortharaka(c): print ("short haraka"),
    if araby.is_ligature(c):print (" ligature"),
    if araby.is_ligature(c):print ('ligature'),
    if araby.is_hamza(c):    print ('hamza'),
    if araby.is_alef(c): print ('alef'),
    if araby.is_yehlike(c):  print ('yeh'),
    if araby.is_wawlike(c):  print ('waw'),
    if araby.is_teh(c):  print ('teh'),
    if araby.is_small(c):    print ('small'),
    if araby.is_weak(c): print ('weak'),
    if araby.is_moon(c): print ('moon'),
    if araby.is_sun(c):print ('sun'),
    print (araby.order(c)),
    print ();
word=u"الْعَرَيِيّةُ"

예제 #5

0

파일 보기

파일: test_unicode.py 프로젝트: linuxscout/pyarabic

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import  pyarabic.araby as araby

for c in araby.arabicrange():
    print c.encode('utf8'),'\t', araby.name(c).encode('utf8'),
    print '\t',
    if araby.is_sukun(c): print "sukun",
    if araby.is_haraka(c): print "haraka",
    if araby.is_shadda(c): print "shadda",
    if araby.is_tatweel(c): print "tatweel",
    if araby.is_tashkeel(c): print "tashkeel",
    if araby.is_tanwin(c): print "tanwin",
    if araby.is_shortharaka(c): print "short haraka",
    if araby.is_ligature(c):print " ligature",
    if araby.is_ligature(c):print 'ligature',
    if araby.is_hamza(c):    print 'hamza',
    if araby.is_alef(c): print 'alef',
    if araby.is_yehlike(c):  print 'yeh',
    if araby.is_wawlike(c):  print 'waw',
    if araby.is_teh(c):  print 'teh',
    if araby.is_small(c):    print 'small',
    if araby.is_weak(c): print 'weak',
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c):print 'sun',
    print araby.order(c),
    print;

예제 #6

0

파일 보기

파일: processor.py 프로젝트: anesfassih/IRIBLI

def decoupage(word):
    """Découpe le mot donné en entrée (word) en (préfixes, racine et suffixes). La sortie de la fonction est une liste
    de dictionnaires regroupant toutes les combinaisons syntaxiquement correctes d'aprés la compatibilitée entre les
     préfixes et sufixes détéctés et la taille de la racine."""
    word_unvocalized = araby.strip_diacritics(word)
    prefixes, suffixes = [""], [""]
    combinaisons_possibles = []
    for p in Prefixe.objects.all():
        if word_unvocalized.startswith(p.unvoweled_form):
            # print("p:"+p.unvoweled_form)
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[:len(p.voweled_form)],
                                       p.voweled_form):
                    prefixes.append(p)
            else:
                prefixes.append(p)
    for s in Suffixe.objects.all():
        if word_unvocalized.endswith(s.unvoweled_form):
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[-len(s.voweled_form):],
                                       s.voweled_form):
                    suffixes.append(s)
            else:
                suffixes.append(s)

    for pr in prefixes:
        for sf in suffixes:
            # Validation criteria
            if pr != "" and sf != "":
                if (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) <= 2 or \
                    (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) > 9:
                    continue
                if ((pr.classe[0] == 'N' and sf.classe[0] == 'V')
                        or (pr.classe[0] == 'V' and sf.classe[0] == 'N')
                        or (pr.classe in ['N1', 'N2', 'N3', 'N5'])):
                    continue
            # Si on est là -> le préfixe est compatible avec le suffixe, et la taille de la base est accéptable
            base = word
            # Supprimer le prefixe de la base // En gardant le Tachkil
            if pr:
                for char in pr.unvoweled_form:
                    while char != base[0]:
                        base = base[1:]
                    base = base[1:]
                while araby.is_tashkeel(base[0]):
                    base = base[1:]

            # Supprimer le suffixe de la base // En gardant le Tachkil
            if sf:
                r_sf = [c for c in sf.unvoweled_form]
                r_sf.reverse()
                for char in r_sf:
                    base = base[:base.rindex(char)]

            combinaisons_possibles.append({
                'Base': base,
                'Préfixe': pr,
                'Suffixe': sf
            })

    return combinaisons_possibles