예제 #1
0
    def test_is_letter(self):

        self.assertTrue(Araby.is_sukun(Araby.SUKUN))
        self.assertTrue(Araby.is_shadda(Araby.SHADDA))
        self.assertTrue(Araby.is_tatweel(Araby.TATWEEL))

        for archar in Araby.TANWIN:
            self.assertTrue(Araby.is_tanwin(archar))

        for archar in Araby.TASHKEEL:
            self.assertTrue(Araby.is_tashkeel(archar))

        for haraka in Araby.HARAKAT:
            self.assertTrue(Araby.is_haraka(haraka))

        for short_haraka in Araby.SHORTHARAKAT:
            self.assertTrue(Araby.is_shortharaka(short_haraka))

        for liguature in Araby.LIGUATURES:
            self.assertTrue(Araby.is_ligature(liguature))

        for hamza in Araby.HAMZAT:
            self.assertTrue(Araby.is_hamza(hamza))

        for alef in Araby.ALEFAT:
            self.assertTrue(Araby.is_alef(alef))

        for yeh in Araby.YEHLIKE:
            self.assertTrue(Araby.is_yehlike(yeh))

        for waw in Araby.WAWLIKE:
            self.assertTrue(Araby.is_wawlike(waw))

        for teh in Araby.TEHLIKE:
            self.assertTrue(Araby.is_teh)

        for small in Araby.SMALL:
            self.assertTrue(Araby.is_small(small))

        for weak in Araby.WEAK:
            self.assertTrue(Araby.is_weak(weak))

        for archar in Araby.MOON:
            self.assertTrue(Araby.is_moon(archar))

        for archar in  Araby.SUN:
            self.assertTrue(Araby.is_sun(archar))
예제 #2
0
 def get_tashkeel_position(page, start, end):
     if not page or start is None or start < 0 or not end:
         return None
     tashkeel_start = None
     tashkeel_end = None
     counter = -1
     index = 0
     for char in page:
         if not is_tashkeel(char):
             counter += 1
             if counter == start:
                 tashkeel_start = index
         if counter == end:
             tashkeel_end = index
             break
         index += 1
     return MarkPosition(tashkeel_start, tashkeel_end)
예제 #3
0
    division,
)
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import pyarabic.araby as araby

for c in araby.arabicrange():
    print(c, '\t', araby.name(c), end=" ")
    print('\t', end=" ")
    if araby.is_sukun(c): print("sukun", end=" ")
    if araby.is_haraka(c): print("haraka", end=" ")
    if araby.is_shadda(c): print("shadda", end=" ")
    if araby.is_tatweel(c): print("tatweel", end=" ")
    if araby.is_tashkeel(c): print("tashkeel", end=" ")
    if araby.is_tanwin(c): print("tanwin", end=" ")
    if araby.is_shortharaka(c): print("short haraka", end=" ")
    if araby.is_ligature(c): print(" ligature", end=" ")
    if araby.is_ligature(c): print('ligature', end=" ")
    if araby.is_hamza(c): print('hamza', end=" ")
    if araby.is_alef(c): print('alef', end=" ")
    if araby.is_yehlike(c): print('yeh', end=" ")
    if araby.is_wawlike(c): print('waw', end=" ")
    if araby.is_teh(c): print('teh', end=" ")
    if araby.is_small(c): print('small', end=" ")
    if araby.is_weak(c): print('weak', end=" ")
    if araby.is_moon(c): print('moon', end=" ")
    if araby.is_sun(c): print('sun', end=" ")
    print(araby.order(c), end=" ")
    print()
예제 #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append('../')
from  pyarabic import araby


for c in araby.arabicrange():
    print (c,'\t', araby.name(c))
    print ('\t')
    if araby.is_sukun(c): print ("sukun")
    if araby.is_haraka(c): print ("haraka")
    if araby.is_shadda(c): print ("shadda")
    if araby.is_tatweel(c): print ("tatweel")
    if araby.is_tashkeel(c): print ("tashkeel")
    if araby.is_tanwin(c): print ("tanwin")
    if araby.is_shortharaka(c): print ("short haraka"),
    if araby.is_ligature(c):print (" ligature"),
    if araby.is_ligature(c):print ('ligature'),
    if araby.is_hamza(c):    print ('hamza'),
    if araby.is_alef(c): print ('alef'),
    if araby.is_yehlike(c):  print ('yeh'),
    if araby.is_wawlike(c):  print ('waw'),
    if araby.is_teh(c):  print ('teh'),
    if araby.is_small(c):    print ('small'),
    if araby.is_weak(c): print ('weak'),
    if araby.is_moon(c): print ('moon'),
    if araby.is_sun(c):print ('sun'),
    print (araby.order(c)),
    print ();
word=u"الْعَرَيِيّةُ"
예제 #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import  pyarabic.araby as araby

for c in araby.arabicrange():
    print c.encode('utf8'),'\t', araby.name(c).encode('utf8'),
    print '\t',
    if araby.is_sukun(c): print "sukun",
    if araby.is_haraka(c): print "haraka",
    if araby.is_shadda(c): print "shadda",
    if araby.is_tatweel(c): print "tatweel",
    if araby.is_tashkeel(c): print "tashkeel",
    if araby.is_tanwin(c): print "tanwin",
    if araby.is_shortharaka(c): print "short haraka",
    if araby.is_ligature(c):print " ligature",
    if araby.is_ligature(c):print 'ligature',
    if araby.is_hamza(c):    print 'hamza',
    if araby.is_alef(c): print 'alef',
    if araby.is_yehlike(c):  print 'yeh',
    if araby.is_wawlike(c):  print 'waw',
    if araby.is_teh(c):  print 'teh',
    if araby.is_small(c):    print 'small',
    if araby.is_weak(c): print 'weak',
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c):print 'sun',
    print araby.order(c),
    print;
예제 #6
0
def decoupage(word):
    """Découpe le mot donné en entrée (word) en (préfixes, racine et suffixes). La sortie de la fonction est une liste
    de dictionnaires regroupant toutes les combinaisons syntaxiquement correctes d'aprés la compatibilitée entre les
     préfixes et sufixes détéctés et la taille de la racine."""
    word_unvocalized = araby.strip_diacritics(word)
    prefixes, suffixes = [""], [""]
    combinaisons_possibles = []
    for p in Prefixe.objects.all():
        if word_unvocalized.startswith(p.unvoweled_form):
            # print("p:"+p.unvoweled_form)
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[:len(p.voweled_form)],
                                       p.voweled_form):
                    prefixes.append(p)
            else:
                prefixes.append(p)
    for s in Suffixe.objects.all():
        if word_unvocalized.endswith(s.unvoweled_form):
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[-len(s.voweled_form):],
                                       s.voweled_form):
                    suffixes.append(s)
            else:
                suffixes.append(s)

    for pr in prefixes:
        for sf in suffixes:
            # Validation criteria
            if pr != "" and sf != "":
                if (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) <= 2 or \
                    (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) > 9:
                    continue
                if ((pr.classe[0] == 'N' and sf.classe[0] == 'V')
                        or (pr.classe[0] == 'V' and sf.classe[0] == 'N')
                        or (pr.classe in ['N1', 'N2', 'N3', 'N5'])):
                    continue
            # Si on est là -> le préfixe est compatible avec le suffixe, et la taille de la base est accéptable
            base = word
            # Supprimer le prefixe de la base // En gardant le Tachkil
            if pr:
                for char in pr.unvoweled_form:
                    while char != base[0]:
                        base = base[1:]
                    base = base[1:]
                while araby.is_tashkeel(base[0]):
                    base = base[1:]

            # Supprimer le suffixe de la base // En gardant le Tachkil
            if sf:
                r_sf = [c for c in sf.unvoweled_form]
                r_sf.reverse()
                for char in r_sf:
                    base = base[:base.rindex(char)]

            combinaisons_possibles.append({
                'Base': base,
                'Préfixe': pr,
                'Suffixe': sf
            })

    return combinaisons_possibles