예제 #1
0
    def test_general_letters(self):

        # test order()
        assert Araby.order(Araby.ALEF) == 1
        assert Araby.order(Araby.HAMZA) == 29
        assert Araby.order(Araby.YEH) == 28
        assert Araby.order(Araby.TEH_MARBUTA) == 3
        assert Araby.order(Araby.TEH) == 3

        # test name()
        assert Araby.name(u"أ") == u'همزة على الألف'
        assert Araby.name(u"ب") == u'باء'
        assert Araby.name(Araby.ALEF_HAMZA_ABOVE) == u'همزة على الألف'
        assert Araby.name(u"ة") == u'تاء مربوطة'
예제 #2
0
 def decode_triverb(self, verb_table, limit):
     """ readlines from files """
     verb_list = []
     for fields in verb_table:
         if len(fields)  >= 4:
                                 #الفعل  الجذر   باب التصريف T   id
             vdict= {"verb": fields[0],
             "root":fields[1],
             "haraka":araby.name(get_future_type_entree(fields[2])),
             "transitive":fields[3],
             }
             verb_list.append(vdict)
     return verb_list
예제 #3
0
def mainly():
    """
    main test
    """
    mode = "fa3il"
    DATA_FILE = "samples/fa3il-5.csv"
    #~ DATA_FILE = "samples/maf3oul.csv"
    #~ mode = "maf3oul"
    with open(DATA_FILE) as f:
        line = f.readline().decode('utf8')
        while line :
            if not line.startswith("#"):
                liste = line.strip('\n').split("\t")
                if len(liste) >= 2 :
            
                    correct = liste[0]
                    word = liste[1].split(';')[0]
                    transitive = True
                    future_type = get_future_type(word)
                    if valid_verb.is_valid_infinitive_verb(word,True): # vocalized
                        result = generate_subject(word,  transitive, future_type)
                        fa3il = result.get("subject", u"")
                        maf3oul = result.get("object", u"")
                        inf_verb = result.get("infinitive", word)
                        debug = False
                        #~ debug = True
                        if mode == "maf3oul":
                            if debug or not correct == maf3oul:
                                print (u"\t".join([word,inf_verb,araby.name(future_type)+future_type,maf3oul, correct, str(correct==maf3oul)])).encode('utf8')
                        else:
                            if debug or not correct == fa3il:
                                print (u"\t".join([word,inf_verb,fa3il, correct, str(correct==fa3il)])).encode('utf8')

                    else:
                        print (u"\t".join([word,"","", correct, "Invalid_Verb"])).encode('utf8')                    
            line = line = f.readline().decode('utf8')
예제 #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import (
    absolute_import,
    print_function,
    unicode_literals,
    division,
)
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import pyarabic.araby as araby

for c in araby.arabicrange():
    print(c, '\t', araby.name(c), end=" ")
    print('\t', end=" ")
    if araby.is_sukun(c): print("sukun", end=" ")
    if araby.is_haraka(c): print("haraka", end=" ")
    if araby.is_shadda(c): print("shadda", end=" ")
    if araby.is_tatweel(c): print("tatweel", end=" ")
    if araby.is_tashkeel(c): print("tashkeel", end=" ")
    if araby.is_tanwin(c): print("tanwin", end=" ")
    if araby.is_shortharaka(c): print("short haraka", end=" ")
    if araby.is_ligature(c): print(" ligature", end=" ")
    if araby.is_ligature(c): print('ligature', end=" ")
    if araby.is_hamza(c): print('hamza', end=" ")
    if araby.is_alef(c): print('alef', end=" ")
    if araby.is_yehlike(c): print('yeh', end=" ")
    if araby.is_wawlike(c): print('waw', end=" ")
    if araby.is_teh(c): print('teh', end=" ")
예제 #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append('../')
from  pyarabic import araby


for c in araby.arabicrange():
    print (c,'\t', araby.name(c))
    print ('\t')
    if araby.is_sukun(c): print ("sukun")
    if araby.is_haraka(c): print ("haraka")
    if araby.is_shadda(c): print ("shadda")
    if araby.is_tatweel(c): print ("tatweel")
    if araby.is_tashkeel(c): print ("tashkeel")
    if araby.is_tanwin(c): print ("tanwin")
    if araby.is_shortharaka(c): print ("short haraka"),
    if araby.is_ligature(c):print (" ligature"),
    if araby.is_ligature(c):print ('ligature'),
    if araby.is_hamza(c):    print ('hamza'),
    if araby.is_alef(c): print ('alef'),
    if araby.is_yehlike(c):  print ('yeh'),
    if araby.is_wawlike(c):  print ('waw'),
    if araby.is_teh(c):  print ('teh'),
    if araby.is_small(c):    print ('small'),
    if araby.is_weak(c): print ('weak'),
    if araby.is_moon(c): print ('moon'),
    if araby.is_sun(c):print ('sun'),
    print (araby.order(c)),
    print ();
word=u"الْعَرَيِيّةُ"
예제 #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import  pyarabic.araby as araby

for c in araby.arabicrange():
    print c.encode('utf8'),'\t', araby.name(c).encode('utf8'),
    print '\t',
    if araby.is_sukun(c): print "sukun",
    if araby.is_haraka(c): print "haraka",
    if araby.is_shadda(c): print "shadda",
    if araby.is_tatweel(c): print "tatweel",
    if araby.is_tashkeel(c): print "tashkeel",
    if araby.is_tanwin(c): print "tanwin",
    if araby.is_shortharaka(c): print "short haraka",
    if araby.is_ligature(c):print " ligature",
    if araby.is_ligature(c):print 'ligature',
    if araby.is_hamza(c):    print 'hamza',
    if araby.is_alef(c): print 'alef',
    if araby.is_yehlike(c):  print 'yeh',
    if araby.is_wawlike(c):  print 'waw',
    if araby.is_teh(c):  print 'teh',
    if araby.is_small(c):    print 'small',
    if araby.is_weak(c): print 'weak',
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c):print 'sun',
    print araby.order(c),
    print;
예제 #7
0
def standard2(word_nm, harakat):
    """ join the harakat and the letters to the give word
     in the standard script, 
    it return one strings ( the word with harakat and the harakat).

    @param word_nm: given unvocalized word.
    @type word_nm: unicode.
    @param harakat: given harakat.
    @type harakat: unicode.
    @return: vocalized word.
    @rtype: unicode.
    """
    if len(word_nm) != len(harakat):
        print(word_nm.encode('utf8'), len(word_nm),
              u"-".join([araby.name(x) for x in harakat]), len(harakat))
        return u"*"
    else:
        word = u""
        i = 0
        word_nm, harakat = geminating(word_nm, harakat)
        if len(word_nm) != len(harakat):
            return u""
    ## حالة عدم الابتداء بسكون
    ##إذا كان الحرف الثاني مضموما  تكون الحركة الأولى مضمومة، وإلا تكون مكسورة
        if len(harakat) != 0 and harakat.startswith(SUKUN):
            word_nm = ALEF + word_nm
            if len(harakat) >= 2 and harakat[1] in \
                    (DAMMA, vconst.WAW_HARAKA):
                harakat = DAMMA + harakat
            else:
                harakat = KASRA + harakat

    ##        word_nm = tahmeez2(word_nm, harakat)
        if len(word_nm) != len(harakat):
            return u""
        word_before = word_nm
        harakat_before = harakat
        word_nm, harakat = homogenize(word_nm, harakat)
        if len(word_nm) != len(harakat):
            print("len word: ", len(word_nm), word_nm.encode('utf8'))
            print("len harakat: ", len(harakat), repr(harakat))
            print(repr(harakat_before), word_before.encode('utf8'))
            return u""
        word_nm = tahmeez2(word_nm, harakat)

        len_word_nm = len(word_nm)
        while i < len_word_nm:
            # للعمل :
            # هذه حالة الألف التي أصلها ياء
            # وقد استغنينا عنها بأن جعلنا الحرف الناقص من الفعل الناقص حرفا تاما
            if harakat[i] in vconst.WRITTEN_HARAKA:
                word += word_nm[i] + vconst.WRITTEN_HARAKA[harakat[i]]
            else:
                word += word_nm[i] + harakat[i]
            i += 1

    #-تحويل همزة القطع على الألف بعدها فتحة


#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
    for (pat, rep) in vconst.STANDARD_REPLACEMENT:
        word = word.replace(pat, rep)

    return word
예제 #8
0
    def decode_verb_tuple(self, tuple_verb):
        """
        """
        #abbrevated=False;
        verb_field_number=2;
        root_field_number=1;
        verb_cat_field_number=3;
        root  = self.decode_root(tuple_verb[root_field_number].strip());        
        word  = tuple_verb[verb_field_number].strip();


        if not is_valid_infinitive_verb(word):
            return {"text":u"#\t'%s'\tis invalid verb "%word};
        else:

            model = tuple_verb[0].strip();            
            #print word.encode("utf8")
            future_type = u"-";
            future_type = araby.name(get_future_type_entree(future_type));
            verb_cat = tuple_verb[verb_cat_field_number].strip();
            # decode transitive flag
            #print "'%s'"%transitive;
            transitive = self.decode_transitive(verb_cat);
            #tenses=decode_tenses(verb_cat);
            # decode the tenses

            #init at False
            all=False;
            future=False;
            past=False;
            passive=False;
            imperative=False;
            confirmed=False;
            future_moode=False;
            # متعدي لغير العاقل
            unthink_trans=False;
            # متعدي للعاقل، تلقائيا اﻷفعال تقبل العاقل
            think_trans=True;
            #فعل قلوب
            reflexive_trans=False;
            #متعدي لمفعولين
            double_trans=False;
            if verb_cat =="A":# أمر فقط
                imperative=True;
            elif verb_cat =="D":# مبني للمجهول فقط
                passive=True;
            elif verb_cat =="I":#لاشخصي
                pass;
            elif verb_cat =="M":#جامد
                pass;
            elif    verb_cat =="P":# ماضي فقط
                past=True;
            elif    verb_cat =="Pu":# ماضي فقط لازم  مطلق
                past=True;
            elif    verb_cat =="Ry":# مضارع وأمر فقط
                future=True;
                imperative=True;
            elif    verb_cat =="Su":# ماضي ومضارع
                future=True;
                past=True;
                confirmed=True;
                future_moode=True;
            elif    verb_cat =="Sv":# ماضي ومضارع
                future=True;
                past=True;
                confirmed=True;
                future_moode=True;
            elif    verb_cat =="Sx":# ماضي ومضارع
                future=True;
                past=True;
                confirmed=True;
                future_moode=True;
            elif    verb_cat =="u":# لازم مطلق، لا مجهول ولا أمر
                future=True;
                past=True;
                confirmed=True;
                future_moode=True;
            elif    verb_cat =="v":# لازم نسبي، كل اﻷزمنة، لكن المجهول مع المفرد الغائب
                future=True;
                past=True;
                imperative=True;
                confirmed=True;
                future_moode=True;
                passive=False;
            elif    verb_cat in ("x","y", "yw", "ywz", "yy", "yz","yzw"): # ماضي فقط
                all=True;
                future=True;
                past=True;
                imperative=True;
                confirmed=True;
                future_moode=True;
                passive=False;
                if verb_cat in ("x","Sx"):
                    unthink_trans=True;
                elif verb_cat in ("yw", "ywz", "yy", "yz","yzw", "Ry"):
                    think_trans=True;
                    if verb_cat in ("ywz","yz","yzw"):
                        double_trans=True;
                    if verb_cat in ("ywz", "yw", "yzw"):
                        reflexive_trans=True;                                   
            else:
                all=True;
            nb_trans=0;
            object_type=u"----"
            reflexive_type=u"----";
            if transitive:
                transitive=u'متعد'
                nb_trans=1;
                if double_trans: 
                    nb_trans=2;
                if think_trans: 
                    object_type=u"عاقل";
                if unthink_trans: 
                    object_type=u"غيرع";
                if reflexive_trans: 
                    reflexive_type=u"فلبي";
            else: 
                transitive=u'لازم'
        ##    codify the tense;
            tenses = self.encode_tense(all, past, future, imperative, passive, future_moode, confirmed)

    ##          print ('\t'.join([word,future_type,str(transitive)])).encode('utf8');

            nb_case=0;
            suggest=u"";
            triliteral=u"غيرثل"
            if is_triliteral_verb(word):
                triliteral=u"ثلاثي"
            if False:
        # search the future haraka for the triliteral verb
                liste_verb = find_alltriverb(word, araby.FATHA,True);
        # if there are more verb forms, select the first one
                filtered = [item for item in liste_verb if item['verb'] == word]
                if filtered:
                    #~ word = liste_verb[0]["verb"]
                    haraka = filtered[0]["haraka"]
                    future_type = haraka;
                    transitive_mark = filtered[0]["transitive"].strip();
                    if transitive_mark in (u"م",u"ك"):
                        transitive = u"متعد"
                    else:
                        transitive = u"لازم"
                else:
                    return {"text":u"#gen_verb_dict: %s error no tri verb"%word}
                if  liste_verb:                        
                    if len(liste_verb)>1: 
                        #suggest=u"هل تقصد؟<br/>"
                        nb_case = len(liste_verb);
        # the other forms are suggested
                    for i in range(1,len(liste_verb)):
                        suggested_word = liste_verb[i]["verb"]
                        suggested_haraka = liste_verb[i]["haraka"]
                        suggested_transitive = liste_verb[i]["transitive"]
                        future_form = get_future_form(suggested_word,suggested_haraka);
                        suggest=u"\t".join([suggest,suggested_word,u"["+suggested_haraka+u"]"]);
                else:suggest="-"
            verb_dict = {'nb_case': str(nb_case), 
                 'word': word, 
                 'tenses': tenses, 
                 'nb_trans': nb_trans, 
                 'suggest': suggest, 
                 'triliteral': triliteral, 
                 'object_type': object_type, 
                 'verb_cat': verb_cat, 
                 'future_type': future_type, 
                 'model': model, 
                 'transitive': transitive, 
                 'root': root, 
                 'reflexive_type': reflexive_type, 
                 }
            return verb_dict
예제 #9
0
                   for key, group in groupby(aa5irHarf)]
print(freqOfAa5irHarf)
import collections
counter = collections.Counter(aa5irHarf)
print(counter)
# Counter({1: 4, 2: 4, 3: 2, 5: 2, 4: 1})
print(counter.values())
# [4, 4, 2, 1, 2]
print(counter.keys())
# [1, 2, 3, 4, 5]
print(counter.most_common(3))
# [(1, 4), (2, 4), (3, 2)]
print(counter.most_common(1))
kkey = counter.most_common(1)
#we should write to file or save it anywhere
#and also we should generalize it to all poems for each poet

#القافية :آخر ساكن وبدور عالساكن اللي قبله مع الحرف المتحرك اللي قبل الساكن ال ما قبل الاخير
print('********** Al Qafiya ************')
for line in f:
    line1 = araby.strip_tatweel(line)
    letters, hrkat = araby.separate(line1)
    #print(letters.encode('utf8'))
    for m in hrkat:
        #لازم نعمل تعديلات
        if not araby.is_tatweel(m):
            print(araby.name(m))
            print(''.join(m))

#Most Common Words بنعملهم بكل قصائد الشاعر
예제 #10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append("../")
#~ import  pyarabic.harf as harf
#from   pyarabic import harf
import pyarabic.araby as araby

for c in araby.arabicrange():
    print c.encode('utf8'), '\t', araby.name(c).encode('utf8'),
    print '\t',
    if araby.is_sukun(c): print "sukun",
    if araby.is_haraka(c): print "haraka",
    if araby.is_shadda(c): print "shadda",
    if araby.is_tatweel(c): print "tatweel",
    if araby.is_tashkeel(c): print "tashkeel",
    if araby.is_tanwin(c): print "tanwin",
    if araby.is_shortharaka(c): print "short haraka",
    if araby.is_ligature(c): print " ligature",
    if araby.is_ligature(c): print 'ligature',
    if araby.is_hamza(c): print 'hamza',
    if araby.is_alef(c): print 'alef',
    if araby.is_yehlike(c): print 'yeh',
    if araby.is_wawlike(c): print 'waw',
    if araby.is_teh(c): print 'teh',
    if araby.is_small(c): print 'small',
    if araby.is_weak(c): print 'weak',
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c): print 'sun',
    print araby.order(c),
    print