def test_is_letter(self): self.assertTrue(Araby.is_sukun(Araby.SUKUN)) self.assertTrue(Araby.is_shadda(Araby.SHADDA)) self.assertTrue(Araby.is_tatweel(Araby.TATWEEL)) for archar in Araby.TANWIN: self.assertTrue(Araby.is_tanwin(archar)) for archar in Araby.TASHKEEL: self.assertTrue(Araby.is_tashkeel(archar)) for haraka in Araby.HARAKAT: self.assertTrue(Araby.is_haraka(haraka)) for short_haraka in Araby.SHORTHARAKAT: self.assertTrue(Araby.is_shortharaka(short_haraka)) for liguature in Araby.LIGUATURES: self.assertTrue(Araby.is_ligature(liguature)) for hamza in Araby.HAMZAT: self.assertTrue(Araby.is_hamza(hamza)) for alef in Araby.ALEFAT: self.assertTrue(Araby.is_alef(alef)) for yeh in Araby.YEHLIKE: self.assertTrue(Araby.is_yehlike(yeh)) for waw in Araby.WAWLIKE: self.assertTrue(Araby.is_wawlike(waw)) for teh in Araby.TEHLIKE: self.assertTrue(Araby.is_teh) for small in Araby.SMALL: self.assertTrue(Araby.is_small(small)) for weak in Araby.WEAK: self.assertTrue(Araby.is_weak(weak)) for archar in Araby.MOON: self.assertTrue(Araby.is_moon(archar)) for archar in Araby.SUN: self.assertTrue(Araby.is_sun(archar))
def get_tashkeel_position(page, start, end): if not page or start is None or start < 0 or not end: return None tashkeel_start = None tashkeel_end = None counter = -1 index = 0 for char in page: if not is_tashkeel(char): counter += 1 if counter == start: tashkeel_start = index if counter == end: tashkeel_end = index break index += 1 return MarkPosition(tashkeel_start, tashkeel_end)
division, ) import sys sys.path.append("../") #~ import pyarabic.harf as harf #from pyarabic import harf import pyarabic.araby as araby for c in araby.arabicrange(): print(c, '\t', araby.name(c), end=" ") print('\t', end=" ") if araby.is_sukun(c): print("sukun", end=" ") if araby.is_haraka(c): print("haraka", end=" ") if araby.is_shadda(c): print("shadda", end=" ") if araby.is_tatweel(c): print("tatweel", end=" ") if araby.is_tashkeel(c): print("tashkeel", end=" ") if araby.is_tanwin(c): print("tanwin", end=" ") if araby.is_shortharaka(c): print("short haraka", end=" ") if araby.is_ligature(c): print(" ligature", end=" ") if araby.is_ligature(c): print('ligature', end=" ") if araby.is_hamza(c): print('hamza', end=" ") if araby.is_alef(c): print('alef', end=" ") if araby.is_yehlike(c): print('yeh', end=" ") if araby.is_wawlike(c): print('waw', end=" ") if araby.is_teh(c): print('teh', end=" ") if araby.is_small(c): print('small', end=" ") if araby.is_weak(c): print('weak', end=" ") if araby.is_moon(c): print('moon', end=" ") if araby.is_sun(c): print('sun', end=" ") print(araby.order(c), end=" ") print()
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append('../') from pyarabic import araby for c in araby.arabicrange(): print (c,'\t', araby.name(c)) print ('\t') if araby.is_sukun(c): print ("sukun") if araby.is_haraka(c): print ("haraka") if araby.is_shadda(c): print ("shadda") if araby.is_tatweel(c): print ("tatweel") if araby.is_tashkeel(c): print ("tashkeel") if araby.is_tanwin(c): print ("tanwin") if araby.is_shortharaka(c): print ("short haraka"), if araby.is_ligature(c):print (" ligature"), if araby.is_ligature(c):print ('ligature'), if araby.is_hamza(c): print ('hamza'), if araby.is_alef(c): print ('alef'), if araby.is_yehlike(c): print ('yeh'), if araby.is_wawlike(c): print ('waw'), if araby.is_teh(c): print ('teh'), if araby.is_small(c): print ('small'), if araby.is_weak(c): print ('weak'), if araby.is_moon(c): print ('moon'), if araby.is_sun(c):print ('sun'), print (araby.order(c)), print (); word=u"الْعَرَيِيّةُ"
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append("../") #~ import pyarabic.harf as harf #from pyarabic import harf import pyarabic.araby as araby for c in araby.arabicrange(): print c.encode('utf8'),'\t', araby.name(c).encode('utf8'), print '\t', if araby.is_sukun(c): print "sukun", if araby.is_haraka(c): print "haraka", if araby.is_shadda(c): print "shadda", if araby.is_tatweel(c): print "tatweel", if araby.is_tashkeel(c): print "tashkeel", if araby.is_tanwin(c): print "tanwin", if araby.is_shortharaka(c): print "short haraka", if araby.is_ligature(c):print " ligature", if araby.is_ligature(c):print 'ligature', if araby.is_hamza(c): print 'hamza', if araby.is_alef(c): print 'alef', if araby.is_yehlike(c): print 'yeh', if araby.is_wawlike(c): print 'waw', if araby.is_teh(c): print 'teh', if araby.is_small(c): print 'small', if araby.is_weak(c): print 'weak', if araby.is_moon(c): print 'moon', if araby.is_sun(c):print 'sun', print araby.order(c), print;
def decoupage(word): """Découpe le mot donné en entrée (word) en (préfixes, racine et suffixes). La sortie de la fonction est une liste de dictionnaires regroupant toutes les combinaisons syntaxiquement correctes d'aprés la compatibilitée entre les préfixes et sufixes détéctés et la taille de la racine.""" word_unvocalized = araby.strip_diacritics(word) prefixes, suffixes = [""], [""] combinaisons_possibles = [] for p in Prefixe.objects.all(): if word_unvocalized.startswith(p.unvoweled_form): # print("p:"+p.unvoweled_form) if araby.is_vocalized(word): if araby.vocalizedlike(word[:len(p.voweled_form)], p.voweled_form): prefixes.append(p) else: prefixes.append(p) for s in Suffixe.objects.all(): if word_unvocalized.endswith(s.unvoweled_form): if araby.is_vocalized(word): if araby.vocalizedlike(word[-len(s.voweled_form):], s.voweled_form): suffixes.append(s) else: suffixes.append(s) for pr in prefixes: for sf in suffixes: # Validation criteria if pr != "" and sf != "": if (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) <= 2 or \ (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) > 9: continue if ((pr.classe[0] == 'N' and sf.classe[0] == 'V') or (pr.classe[0] == 'V' and sf.classe[0] == 'N') or (pr.classe in ['N1', 'N2', 'N3', 'N5'])): continue # Si on est là -> le préfixe est compatible avec le suffixe, et la taille de la base est accéptable base = word # Supprimer le prefixe de la base // En gardant le Tachkil if pr: for char in pr.unvoweled_form: while char != base[0]: base = base[1:] base = base[1:] while araby.is_tashkeel(base[0]): base = base[1:] # Supprimer le suffixe de la base // En gardant le Tachkil if sf: r_sf = [c for c in sf.unvoweled_form] r_sf.reverse() for char in r_sf: base = base[:base.rindex(char)] combinaisons_possibles.append({ 'Base': base, 'Préfixe': pr, 'Suffixe': sf }) return combinaisons_possibles