Ejemplo n.º 1
0
def standard_harakat(word):
    """
    Treat Harakat on the word before output.
    معالجة الحركات قبل الإخراج،
    @param word: given vocalized word.
    @type word: unicode.
    @return: <vocalized word with ajusted harakat.
    @rtype: unicode.
    """
    k = 1
    new_word = word[0]
    len_word = len(word)
    while k < len_word:
# الحروف من دون العلة لا تؤخذ بيعين الاعتبار، كما لا تؤخذ إذا كانت في أول الكلمة
        if word[k] not in (ALEF, YEH, WAW, ALEF_MAKSURA):
            new_word += word[k]
        else:
    ##إذا كان الحرف علة ولم يكن في أول الكلمة
    ##إذا كان ما قبله ليس حركة، ومابعده ليس حركة، أو انتهت الكلمة
            if not araby.is_shortharaka(word[k-1]) and \
            (k+1 >= len_word or not araby.is_shortharaka(word[k+1])) :
                if word[k] == ALEF:
                    new_word += FATHA+ALEF
                elif word[k] == WAW :
                    new_word += DAMMA+WAW
                elif word[k] == YEH:
                    new_word += KASRA+YEH
                else:
                    new_word += word[k]
            else:
                new_word += word[k]
        k += 1
    return new_word
Ejemplo n.º 2
0
def standard_harakat(word):
    """
    Treat Harakat on the word before output.
    معالجة الحركات قبل الإخراج،
    @param word: given vocalized word.
    @type word: unicode.
    @return: <vocalized word with ajusted harakat.
    @rtype: unicode.
    """
    k = 1
    new_word = word[0]
    len_word = len(word)
    while k < len_word:
        # الحروف من دون العلة لا تؤخذ بيعين الاعتبار، كما لا تؤخذ إذا كانت في أول الكلمة
        if word[k] not in (ALEF, YEH, WAW, ALEF_MAKSURA):
            new_word += word[k]
        else:
            ##إذا كان الحرف علة ولم يكن في أول الكلمة
            ##إذا كان ما قبله ليس حركة، ومابعده ليس حركة، أو انتهت الكلمة
            if not araby.is_shortharaka(word[k-1]) and \
            (k+1 >= len_word or not araby.is_shortharaka(word[k+1])) :
                if word[k] == ALEF:
                    new_word += FATHA + ALEF
                elif word[k] == WAW:
                    new_word += DAMMA + WAW
                elif word[k] == YEH:
                    new_word += KASRA + YEH
                else:
                    new_word += word[k]
            else:
                new_word += word[k]
        k += 1
    return new_word
Ejemplo n.º 3
0
def uniformate_suffix(word):
    """ separate the harakat and the letters of the given word, 
    it return two strings ( the word without harakat and the harakat).
    If the weaked letters are reprsented as long harakat and striped 
    from the word.
    """
    ## type : affix : uniformate affixes
    ##    word = normalize_affix(word)
    word = word.replace(SHADDA, SUKUN + SHADDA)
    shakl = u""
    word_nm = u""
    i = 0
    len_word = len(word)
    #    print "len word", len(word)
    while i < len_word:
        if not araby.is_shortharaka(word[i]):  # not in HARAKAT:
            word_nm += word[i]
            if i + 1 < len(word) and araby.is_shortharaka(word[i + 1]):
                if word[i + 1] == FATHA:
                    if i+2 < len(word) and word[i+2] == ALEF and \
                                 i+3 < len(word):
                        shakl += vconst.ALEF_HARAKA
                        i += 3
                    else:
                        shakl += FATHA
                        i += 2
                elif word[i+1] == DAMMA and i+2 < len(word) and \
                       word[i+2] == WAW:
                    if i + 3 >= len(word) or not araby.is_shortharaka(
                            word[i + 3]):
                        shakl += vconst.WAW_HARAKA
                        i += 3
                    else:
                        shakl += DAMMA
                        i += 2
                elif word[i+1] == KASRA and i+2 < len(word) and \
                 word[i+2] == YEH:
                    if i + 3 >= len(word) or not araby.is_shortharaka(
                            word[i + 3]):
                        shakl += vconst.YEH_HARAKA
                        i += 3
                    else:
                        shakl += KASRA
                        i += 2
                else:
                    shakl += word[i + 1]
                    i += 2

            elif i + 1 < len(word) and araby.is_haraka(word[i + 1]):
                shakl += word[i + 1]
            else:
                shakl += vconst.NOT_DEF_HARAKA
                i += 1
        else:
            i += 1
    if len(word_nm) == len(shakl):
        return (word_nm, shakl)
    else:
        return (u"", u"")
Ejemplo n.º 4
0
def uniformate_suffix(word):
    """ separate the harakat and the letters of the given word, 
    it return two strings ( the word without harakat and the harakat).
    If the weaked letters are reprsented as long harakat and striped 
    from the word.
    """
    ## type : affix : uniformate affixes
##    word = normalize_affix(word)
    word = word.replace(SHADDA, SUKUN+SHADDA)
    shakl = u""
    word_nm = u""
    i = 0
    len_word = len(word)
#    print "len word", len(word)
    while i < len_word:
        if not araby.is_shortharaka(word[i]):  # not in HARAKAT:
            word_nm += word[i]
            if i+1 < len(word) and araby.is_shortharaka(word[i+1]):
                if word[i+1] == FATHA :
                    if i+2 < len(word) and word[i+2] == ALEF and \
                                 i+3 < len(word):
                        shakl += vconst.ALEF_HARAKA
                        i += 3
                    else :
                        shakl += FATHA
                        i += 2
                elif word[i+1] == DAMMA and i+2 < len(word) and \
                       word[i+2] == WAW:
                    if i+3 >= len(word) or not araby.is_shortharaka(word[i+3]):
                        shakl += vconst.WAW_HARAKA
                        i += 3
                    else :
                        shakl += DAMMA
                        i += 2
                elif word[i+1] == KASRA and i+2 < len(word) and \
                 word[i+2] == YEH:
                    if i+3 >= len(word) or not araby.is_shortharaka(word[i+3]):
                        shakl += vconst.YEH_HARAKA
                        i += 3
                    else :
                        shakl += KASRA
                        i += 2
                else :
                    shakl += word[i+1]
                    i += 2

            elif  i+1 < len(word) and araby.is_haraka(word[i+1]):
                shakl += word[i+1]
            else:
                shakl += vconst.NOT_DEF_HARAKA
                i += 1
        else: i += 1
    if len(word_nm) == len(shakl):
        return (word_nm, shakl)
    else: return (u"", u"")
Ejemplo n.º 5
0
def normalize(word, wordtype="affix"):
    """
    Normalize the word, by unifoming hamzat, Alef madda, shadda, and lamalefs.
    @param word: given word.
    @type word: unicode.
    @param type: if the word is an affix
    @type type: unicode.
    @return: converted word.
    @rtype: unicode.
    """
    # تحويل الكلمة إلى شكلها النظري.
    # الشكل اللإملائي للكلمة هو طريقة كتابتها حسب قواعد الإملاء
    # الشكل النظري هو الشكل المتخيل للكلمة دون تطبيق قواعد اللغة
    # ويخص عادة الأشكال المتعددة للهمزة، و التي تكتب همزة على السطر
    # أمثلة
    # إملائي        نظري
    #إِمْلَائِي        ءِمْلَاءِي
    #سَاَلَ        سَءَلَ
    # الهدف : تحويل الكلمة إلى شكل نظري،
    #ومن ثم إمكانية تصريفها بعيدا عن قواعد الإملاء،
    #وبعد التصريف يتم تطبيق قواعد الإملاء من جديد.
    #الفرضية: الكلمات المدخلة مشكولة شكلا تاما.
    #الطريقة:
    # 1-تحويل جميع أنواع الهمزات إلى همزة على السطر
    # 1-فك الإدغام
    i = 0
    #   strip tatweel
    # the tatweel is used to uniformate the affix
    # when the Haraka is used separetely
    if wordtype != "affix":
        word = araby.strip_tatweel(word)


## تستبدل الألف الممدودة في , ل الكلمة بهمزة قطع بعدها همزة أخرى
    if word.startswith(ALEF_MADDA):
        word = normalize_alef_madda(word)

    # ignore harakat at the begin of the word
    len_word = len(word)
    while i < len_word and araby.is_shortharaka(word[i]):  # in HARAKAT:
        i += 1
    word = word[i:]
    # convert all Hamza from into one form
    word = araby.normalize_hamza(word)
    #Convert All LAM ALEF Ligature into separate letters
    word = word.replace(LAM_ALEF, SIMPLE_LAM_ALEF)
    word = word.replace(LAM_ALEF_HAMZA_ABOVE, SIMPLE_LAM_ALEF_HAMZA_ABOVE)
    word = word.replace(LAM_ALEF_MADDA_ABOVE, SIMPLE_LAM_ALEF_MADDA_ABOVE)
    return word
Ejemplo n.º 6
0
def normalize(word, wordtype = "affix"):
    """
    Normalize the word, by unifoming hamzat, Alef madda, shadda, and lamalefs.
    @param word: given word.
    @type word: unicode.
    @param type: if the word is an affix
    @type type: unicode.
    @return: converted word.
    @rtype: unicode.
    """
# تحويل الكلمة إلى شكلها النظري.
# الشكل اللإملائي للكلمة هو طريقة كتابتها حسب قواعد الإملاء
# الشكل النظري هو الشكل المتخيل للكلمة دون تطبيق قواعد اللغة
# ويخص عادة الأشكال المتعددة للهمزة، و التي تكتب همزة على السطر
# أمثلة
# إملائي        نظري
#إِمْلَائِي        ءِمْلَاءِي
#سَاَلَ        سَءَلَ
# الهدف : تحويل الكلمة إلى شكل نظري، 
#ومن ثم إمكانية تصريفها بعيدا عن قواعد الإملاء،
#وبعد التصريف يتم تطبيق قواعد الإملاء من جديد.
#الفرضية: الكلمات المدخلة مشكولة شكلا تاما.
#الطريقة:
# 1-تحويل جميع أنواع الهمزات إلى همزة على السطر
# 1-فك الإدغام
    i = 0
#   strip tatweel
# the tatweel is used to uniformate the affix 
# when the Haraka is used separetely
    if wordtype != "affix":
        word = araby.strip_tatweel(word)
## تستبدل الألف الممدودة في , ل الكلمة بهمزة قطع بعدها همزة أخرى
    if word.startswith(ALEF_MADDA):
        word = normalize_alef_madda(word)

    # ignore harakat at the begin of the word
    len_word = len(word)
    while i < len_word and araby.is_shortharaka(word[i]): # in HARAKAT:
        i += 1
    word = word[i:]
    # convert all Hamza from into one form
    word = araby.normalize_hamza(word)
    #Convert All LAM ALEF Ligature into separate letters
    word = word.replace(LAM_ALEF, SIMPLE_LAM_ALEF)
    word = word.replace(LAM_ALEF_HAMZA_ABOVE, SIMPLE_LAM_ALEF_HAMZA_ABOVE)
    word = word.replace(LAM_ALEF_MADDA_ABOVE, SIMPLE_LAM_ALEF_MADDA_ABOVE)
    return word
Ejemplo n.º 7
0
    def test_is_letter(self):

        self.assertTrue(Araby.is_sukun(Araby.SUKUN))
        self.assertTrue(Araby.is_shadda(Araby.SHADDA))
        self.assertTrue(Araby.is_tatweel(Araby.TATWEEL))

        for archar in Araby.TANWIN:
            self.assertTrue(Araby.is_tanwin(archar))

        for archar in Araby.TASHKEEL:
            self.assertTrue(Araby.is_tashkeel(archar))

        for haraka in Araby.HARAKAT:
            self.assertTrue(Araby.is_haraka(haraka))

        for short_haraka in Araby.SHORTHARAKAT:
            self.assertTrue(Araby.is_shortharaka(short_haraka))

        for liguature in Araby.LIGUATURES:
            self.assertTrue(Araby.is_ligature(liguature))

        for hamza in Araby.HAMZAT:
            self.assertTrue(Araby.is_hamza(hamza))

        for alef in Araby.ALEFAT:
            self.assertTrue(Araby.is_alef(alef))

        for yeh in Araby.YEHLIKE:
            self.assertTrue(Araby.is_yehlike(yeh))

        for waw in Araby.WAWLIKE:
            self.assertTrue(Araby.is_wawlike(waw))

        for teh in Araby.TEHLIKE:
            self.assertTrue(Araby.is_teh)

        for small in Araby.SMALL:
            self.assertTrue(Araby.is_small(small))

        for weak in Araby.WEAK:
            self.assertTrue(Araby.is_weak(weak))

        for archar in Araby.MOON:
            self.assertTrue(Araby.is_moon(archar))

        for archar in  Araby.SUN:
            self.assertTrue(Araby.is_sun(archar))
Ejemplo n.º 8
0
def get_haraka_by_name(haraka_name):
    """
    Convert an arabic named harakat to a real haraka
    values
        - Fahta:(فتحة)
        - DAMMA:(ضمة)
        - KASRA:(كسرة)
    @param haraka_name: the arabic name of haraka.
    @type haraka_name: unicode
    @return: the arabic name of haraka .
    @rtype: unicode char
    """
    if araby.is_shortharaka(haraka_name):
        return haraka_name
    if haraka_name == u"فتحة":
        return FATHA
    elif haraka_name == u"ضمة":
        return DAMMA
    elif haraka_name == u"كسرة":
        return KASRA
    elif haraka_name == u"سكون":
        return SUKUN
    else:
        return False
Ejemplo n.º 9
0
def get_haraka_by_name(haraka_name):
    """
    Convert an arabic named harakat to a real haraka
    values
        - Fahta:(فتحة)
        - DAMMA:(ضمة)
        - KASRA:(كسرة)
    @param haraka_name: the arabic name of haraka.
    @type haraka_name: unicode
    @return: the arabic name of haraka .
    @rtype: unicode char
    """
    if araby.is_shortharaka(haraka_name):
        return haraka_name
    if haraka_name == u"فتحة"  :
        return FATHA
    elif haraka_name == u"ضمة":
        return DAMMA
    elif haraka_name == u"كسرة":
        return KASRA
    elif haraka_name == u"سكون":
        return SUKUN
    else:
        return False
Ejemplo n.º 10
0
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import pyarabic.araby as araby

for c in araby.arabicrange():
    print(c, '\t', araby.name(c), end=" ")
    print('\t', end=" ")
    if araby.is_sukun(c): print("sukun", end=" ")
    if araby.is_haraka(c): print("haraka", end=" ")
    if araby.is_shadda(c): print("shadda", end=" ")
    if araby.is_tatweel(c): print("tatweel", end=" ")
    if araby.is_tashkeel(c): print("tashkeel", end=" ")
    if araby.is_tanwin(c): print("tanwin", end=" ")
    if araby.is_shortharaka(c): print("short haraka", end=" ")
    if araby.is_ligature(c): print(" ligature", end=" ")
    if araby.is_ligature(c): print('ligature', end=" ")
    if araby.is_hamza(c): print('hamza', end=" ")
    if araby.is_alef(c): print('alef', end=" ")
    if araby.is_yehlike(c): print('yeh', end=" ")
    if araby.is_wawlike(c): print('waw', end=" ")
    if araby.is_teh(c): print('teh', end=" ")
    if araby.is_small(c): print('small', end=" ")
    if araby.is_weak(c): print('weak', end=" ")
    if araby.is_moon(c): print('moon', end=" ")
    if araby.is_sun(c): print('sun', end=" ")
    print(araby.order(c), end=" ")
    print()
word = u"الْعَرَيِيّةُ"
word_list = [
Ejemplo n.º 11
0
# -*- coding: utf-8 -*-
import sys
sys.path.append('../')
from  pyarabic import araby


for c in araby.arabicrange():
    print (c,'\t', araby.name(c))
    print ('\t')
    if araby.is_sukun(c): print ("sukun")
    if araby.is_haraka(c): print ("haraka")
    if araby.is_shadda(c): print ("shadda")
    if araby.is_tatweel(c): print ("tatweel")
    if araby.is_tashkeel(c): print ("tashkeel")
    if araby.is_tanwin(c): print ("tanwin")
    if araby.is_shortharaka(c): print ("short haraka"),
    if araby.is_ligature(c):print (" ligature"),
    if araby.is_ligature(c):print ('ligature'),
    if araby.is_hamza(c):    print ('hamza'),
    if araby.is_alef(c): print ('alef'),
    if araby.is_yehlike(c):  print ('yeh'),
    if araby.is_wawlike(c):  print ('waw'),
    if araby.is_teh(c):  print ('teh'),
    if araby.is_small(c):    print ('small'),
    if araby.is_weak(c): print ('weak'),
    if araby.is_moon(c): print ('moon'),
    if araby.is_sun(c):print ('sun'),
    print (araby.order(c)),
    print ();
word=u"الْعَرَيِيّةُ"
word_list=[
Ejemplo n.º 12
0
def uniformate_verb(word):
    """
    Separate the harakat and the letters of the given word, 
    it return two strings ( the word without harakat and the harakat).
    If the weaked letters are reprsented as long harakat 
    and striped from the word.
    @param word: given word.
    @type word: unicode.
    @return: (letters, harakat).
    @rtype: tuple of unicode.
    """
    if word == "":
        return ("", "")
    #normalize ALEF MADDA
    if word.startswith(ALEF_MADDA):
        word = word.replace(ALEF_MADDA, HAMZA+HAMZA)
    else:
        word = word.replace(ALEF_MADDA, HAMZA+ALEF)

    word_nm = araby.strip_harakat(word)
    length = len(word_nm)
    if len(word_nm) != 3:
        # تستعمل الهمزات لتخمين حركات الفعل الثلاثي
        # normalize hamza here, because we use it to 
        # detect harakat on the trilateral verb.
        word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)
    # length of word after normalization

    # اهمزات تستعمل لكشف تشكيل الفعل، يتم توحيدها لاحقا
    if length == 3:
        if word_nm[1]in (ALEF, ALEF_HAMZA_ABOVE) or \
         word_nm[2] in (ALEF_MAKSURA, ALEF_HAMZA_ABOVE, ALEF):
            marks = FATHA+FATHA+FATHA
        elif word[1] == YEH_HAMZA or word[2] in (YEH, YEH_HAMZA):
            marks = FATHA+KASRA+FATHA
        else:
            # let the verb haraka
            i = 0
        ## ignore harakat at the began of the word
            while araby.is_shortharaka(word[i]):# in HARAKAT:
                i += 1
        # الحرف الأول
            if not araby.is_shortharaka(word[i]):#not in HARAKAT:
                i += 1
        # الحركة الأولى
            while araby.is_shortharaka(word[i]):#word[i] in HARAKAT:
                i += 1
        # الحرف الثاني
            if not araby.is_shortharaka(word[i]):#word[i] not in HARAKAT:
                i += 1
        #الحركة الثانية
            if not araby.is_shortharaka(word[i]):#word[i] not in HARAKAT:
            #وجدنا مشاكل في تصريف الفعل المضاعف في الماضي
            # نجعل الحركة الثانية فتحة مؤقتا
            #ToDo: review this case
                secondharaka = FATHA
            else:
                secondharaka = word[i]
            marks = u''.join([FATHA, secondharaka, FATHA])
        # تستعمل الهمزات لتخمين حركات الفعل الثلاثي
        # normalize hamza here, because we use it to 
        # detect harakat on the trilateral verb.
        word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)

    elif length == 4:
        marks = vconst.UNIFORMATE_MARKS_4
    elif length == 5:
        if word_nm.startswith(TEH):
            marks = vconst.UNIFORMATE_MARKS_5TEH
        else :
            marks = vconst.UNIFORMATE_MARKS_5
    elif length == 6:
        marks = vconst.UNIFORMATE_MARKS_6
    else:
        marks = FATHA*len(word_nm)

    i = 1
# first added automaticlly
    new_word = word_nm[0]
    new_harakat = marks[0]
# between the first and the last
    while i < length-1:
        if word_nm[i] == ALEF:
            new_harakat = new_harakat[:-1]+vconst.ALEF_HARAKA
        else:
            new_harakat += marks[i]
            new_word += word_nm[i]
        i += 1
# the last letter
##  حالة الفعل عيا، أعيا، عيّا والتي يتحول إلى ياء بدلا عن واو
    if word_nm[i] == ALEF:
        if len(word_nm) == 3 and word_nm[1] != YEH:
            new_word += vconst.ALEF_MAMDUDA
        else:
            new_word += YEH
    else:
        new_word += word_nm[i]
    new_harakat += marks[i]
##    new_word += word_nm[i]
    return (new_word, new_harakat)
Ejemplo n.º 13
0
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import  pyarabic.araby as araby

for c in araby.arabicrange():
    print c.encode('utf8'),'\t', araby.name(c).encode('utf8'),
    print '\t',
    if araby.is_sukun(c): print "sukun",
    if araby.is_haraka(c): print "haraka",
    if araby.is_shadda(c): print "shadda",
    if araby.is_tatweel(c): print "tatweel",
    if araby.is_tashkeel(c): print "tashkeel",
    if araby.is_tanwin(c): print "tanwin",
    if araby.is_shortharaka(c): print "short haraka",
    if araby.is_ligature(c):print " ligature",
    if araby.is_ligature(c):print 'ligature',
    if araby.is_hamza(c):    print 'hamza',
    if araby.is_alef(c): print 'alef',
    if araby.is_yehlike(c):  print 'yeh',
    if araby.is_wawlike(c):  print 'waw',
    if araby.is_teh(c):  print 'teh',
    if araby.is_small(c):    print 'small',
    if araby.is_weak(c): print 'weak',
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c):print 'sun',
    print araby.order(c),
    print;
word=u"الْعَرَيِيّةُ"
word_list=[
Ejemplo n.º 14
0
def uniformate_verb(word):
    """
    Separate the harakat and the letters of the given word, 
    it return two strings ( the word without harakat and the harakat).
    If the weaked letters are reprsented as long harakat 
    and striped from the word.
    @param word: given word.
    @type word: unicode.
    @return: (letters, harakat).
    @rtype: tuple of unicode.
    """
    if word == "":
        return ("", "")
    #normalize ALEF MADDA
    if word.startswith(ALEF_MADDA):
        word = normalize_alef_madda(word)
    else:
        word = word.replace(ALEF_MADDA, HAMZA + ALEF)

    word_nm = araby.strip_harakat(word)
    length = len(word_nm)
    if len(word_nm) != 3:
        # تستعمل الهمزات لتخمين حركات الفعل الثلاثي
        # normalize hamza here, because we use it to
        # detect harakat on the trilateral verb.
        word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)
    # length of word after normalization

    # اهمزات تستعمل لكشف تشكيل الفعل، يتم توحيدها لاحقا
    if length == 3:
        if word_nm[1]in (ALEF, ALEF_HAMZA_ABOVE) or \
         word_nm[2] in (ALEF_MAKSURA, ALEF_HAMZA_ABOVE, ALEF):
            marks = FATHA + FATHA + FATHA
        elif word[1] == YEH_HAMZA or word[2] in (YEH, YEH_HAMZA):
            marks = FATHA + KASRA + FATHA
        else:
            # let the verb haraka
            i = 0
            ## ignore harakat at the began of the word
            while araby.is_shortharaka(word[i]):  # in HARAKAT:
                i += 1
        # الحرف الأول
            if not araby.is_shortharaka(word[i]):  #not in HARAKAT:
                i += 1
        # الحركة الأولى
            while araby.is_shortharaka(word[i]):  #word[i] in HARAKAT:
                i += 1
        # الحرف الثاني
            if not araby.is_shortharaka(word[i]):  #word[i] not in HARAKAT:
                i += 1
        #الحركة الثانية
            if not araby.is_shortharaka(word[i]):  #word[i] not in HARAKAT:
                #وجدنا مشاكل في تصريف الفعل المضاعف في الماضي
                # نجعل الحركة الثانية فتحة مؤقتا
                #ToDo: review this case
                secondharaka = FATHA
            else:
                secondharaka = word[i]
            marks = u''.join([FATHA, secondharaka, FATHA])
        # تستعمل الهمزات لتخمين حركات الفعل الثلاثي
        # normalize hamza here, because we use it to
        # detect harakat on the trilateral verb.
        word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)

    elif length == 4:
        marks = vconst.UNIFORMATE_MARKS_4
    elif length == 5:
        if word_nm.startswith(TEH):
            marks = vconst.UNIFORMATE_MARKS_5TEH
        else:
            marks = vconst.UNIFORMATE_MARKS_5
    elif length == 6:
        marks = vconst.UNIFORMATE_MARKS_6
    else:
        marks = FATHA * len(word_nm)

    i = 1
    # first added automaticlly
    new_word = word_nm[0]
    new_harakat = marks[0]
    # between the first and the last
    while i < length - 1:
        if word_nm[i] == ALEF:
            new_harakat = new_harakat[:-1] + vconst.ALEF_HARAKA
        else:
            new_harakat += marks[i]
            new_word += word_nm[i]
        i += 1
# the last letter
##  حالة الفعل عيا، أعيا، عيّا والتي يتحول إلى ياء بدلا عن واو
    if word_nm[i] == ALEF:
        if len(word_nm) == 3 and word_nm[1] != YEH:
            new_word += vconst.ALEF_MAMDUDA
        else:
            new_word += YEH
    else:
        new_word += word_nm[i]
    new_harakat += marks[i]
    ##    new_word += word_nm[i]
    return (new_word, new_harakat)
Ejemplo n.º 15
0
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import pyarabic.araby as araby

for c in araby.arabicrange():
    print c.encode('utf8'), '\t', araby.name(c).encode('utf8'),
    print '\t',
    if araby.is_sukun(c): print "sukun",
    if araby.is_haraka(c): print "haraka",
    if araby.is_shadda(c): print "shadda",
    if araby.is_tatweel(c): print "tatweel",
    if araby.is_tashkeel(c): print "tashkeel",
    if araby.is_tanwin(c): print "tanwin",
    if araby.is_shortharaka(c): print "short haraka",
    if araby.is_ligature(c): print " ligature",
    if araby.is_ligature(c): print 'ligature',
    if araby.is_hamza(c): print 'hamza',
    if araby.is_alef(c): print 'alef',
    if araby.is_yehlike(c): print 'yeh',
    if araby.is_wawlike(c): print 'waw',
    if araby.is_teh(c): print 'teh',
    if araby.is_small(c): print 'small',
    if araby.is_weak(c): print 'weak',
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c): print 'sun',
    print araby.order(c),
    print
word = u"الْعَرَيِيّةُ"
word_list = [