def test_general_letters(self): # test order() assert Araby.order(Araby.ALEF) == 1 assert Araby.order(Araby.HAMZA) == 29 assert Araby.order(Araby.YEH) == 28 assert Araby.order(Araby.TEH_MARBUTA) == 3 assert Araby.order(Araby.TEH) == 3 # test name() assert Araby.name(u"أ") == u'همزة على الألف' assert Araby.name(u"ب") == u'باء' assert Araby.name(Araby.ALEF_HAMZA_ABOVE) == u'همزة على الألف' assert Araby.name(u"ة") == u'تاء مربوطة'
def decode_triverb(self, verb_table, limit): """ readlines from files """ verb_list = [] for fields in verb_table: if len(fields) >= 4: #الفعل الجذر باب التصريف T id vdict= {"verb": fields[0], "root":fields[1], "haraka":araby.name(get_future_type_entree(fields[2])), "transitive":fields[3], } verb_list.append(vdict) return verb_list
def mainly(): """ main test """ mode = "fa3il" DATA_FILE = "samples/fa3il-5.csv" #~ DATA_FILE = "samples/maf3oul.csv" #~ mode = "maf3oul" with open(DATA_FILE) as f: line = f.readline().decode('utf8') while line : if not line.startswith("#"): liste = line.strip('\n').split("\t") if len(liste) >= 2 : correct = liste[0] word = liste[1].split(';')[0] transitive = True future_type = get_future_type(word) if valid_verb.is_valid_infinitive_verb(word,True): # vocalized result = generate_subject(word, transitive, future_type) fa3il = result.get("subject", u"") maf3oul = result.get("object", u"") inf_verb = result.get("infinitive", word) debug = False #~ debug = True if mode == "maf3oul": if debug or not correct == maf3oul: print (u"\t".join([word,inf_verb,araby.name(future_type)+future_type,maf3oul, correct, str(correct==maf3oul)])).encode('utf8') else: if debug or not correct == fa3il: print (u"\t".join([word,inf_verb,fa3il, correct, str(correct==fa3il)])).encode('utf8') else: print (u"\t".join([word,"","", correct, "Invalid_Verb"])).encode('utf8') line = line = f.readline().decode('utf8')
#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import ( absolute_import, print_function, unicode_literals, division, ) import sys sys.path.append("../") #~ import pyarabic.harf as harf #from pyarabic import harf import pyarabic.araby as araby for c in araby.arabicrange(): print(c, '\t', araby.name(c), end=" ") print('\t', end=" ") if araby.is_sukun(c): print("sukun", end=" ") if araby.is_haraka(c): print("haraka", end=" ") if araby.is_shadda(c): print("shadda", end=" ") if araby.is_tatweel(c): print("tatweel", end=" ") if araby.is_tashkeel(c): print("tashkeel", end=" ") if araby.is_tanwin(c): print("tanwin", end=" ") if araby.is_shortharaka(c): print("short haraka", end=" ") if araby.is_ligature(c): print(" ligature", end=" ") if araby.is_ligature(c): print('ligature', end=" ") if araby.is_hamza(c): print('hamza', end=" ") if araby.is_alef(c): print('alef', end=" ") if araby.is_yehlike(c): print('yeh', end=" ") if araby.is_wawlike(c): print('waw', end=" ") if araby.is_teh(c): print('teh', end=" ")
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append('../') from pyarabic import araby for c in araby.arabicrange(): print (c,'\t', araby.name(c)) print ('\t') if araby.is_sukun(c): print ("sukun") if araby.is_haraka(c): print ("haraka") if araby.is_shadda(c): print ("shadda") if araby.is_tatweel(c): print ("tatweel") if araby.is_tashkeel(c): print ("tashkeel") if araby.is_tanwin(c): print ("tanwin") if araby.is_shortharaka(c): print ("short haraka"), if araby.is_ligature(c):print (" ligature"), if araby.is_ligature(c):print ('ligature'), if araby.is_hamza(c): print ('hamza'), if araby.is_alef(c): print ('alef'), if araby.is_yehlike(c): print ('yeh'), if araby.is_wawlike(c): print ('waw'), if araby.is_teh(c): print ('teh'), if araby.is_small(c): print ('small'), if araby.is_weak(c): print ('weak'), if araby.is_moon(c): print ('moon'), if araby.is_sun(c):print ('sun'), print (araby.order(c)), print (); word=u"الْعَرَيِيّةُ"
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append("../") #~ import pyarabic.harf as harf #from pyarabic import harf import pyarabic.araby as araby for c in araby.arabicrange(): print c.encode('utf8'),'\t', araby.name(c).encode('utf8'), print '\t', if araby.is_sukun(c): print "sukun", if araby.is_haraka(c): print "haraka", if araby.is_shadda(c): print "shadda", if araby.is_tatweel(c): print "tatweel", if araby.is_tashkeel(c): print "tashkeel", if araby.is_tanwin(c): print "tanwin", if araby.is_shortharaka(c): print "short haraka", if araby.is_ligature(c):print " ligature", if araby.is_ligature(c):print 'ligature', if araby.is_hamza(c): print 'hamza', if araby.is_alef(c): print 'alef', if araby.is_yehlike(c): print 'yeh', if araby.is_wawlike(c): print 'waw', if araby.is_teh(c): print 'teh', if araby.is_small(c): print 'small', if araby.is_weak(c): print 'weak', if araby.is_moon(c): print 'moon', if araby.is_sun(c):print 'sun', print araby.order(c), print;
def standard2(word_nm, harakat): """ join the harakat and the letters to the give word in the standard script, it return one strings ( the word with harakat and the harakat). @param word_nm: given unvocalized word. @type word_nm: unicode. @param harakat: given harakat. @type harakat: unicode. @return: vocalized word. @rtype: unicode. """ if len(word_nm) != len(harakat): print(word_nm.encode('utf8'), len(word_nm), u"-".join([araby.name(x) for x in harakat]), len(harakat)) return u"*" else: word = u"" i = 0 word_nm, harakat = geminating(word_nm, harakat) if len(word_nm) != len(harakat): return u"" ## حالة عدم الابتداء بسكون ##إذا كان الحرف الثاني مضموما تكون الحركة الأولى مضمومة، وإلا تكون مكسورة if len(harakat) != 0 and harakat.startswith(SUKUN): word_nm = ALEF + word_nm if len(harakat) >= 2 and harakat[1] in \ (DAMMA, vconst.WAW_HARAKA): harakat = DAMMA + harakat else: harakat = KASRA + harakat ## word_nm = tahmeez2(word_nm, harakat) if len(word_nm) != len(harakat): return u"" word_before = word_nm harakat_before = harakat word_nm, harakat = homogenize(word_nm, harakat) if len(word_nm) != len(harakat): print("len word: ", len(word_nm), word_nm.encode('utf8')) print("len harakat: ", len(harakat), repr(harakat)) print(repr(harakat_before), word_before.encode('utf8')) return u"" word_nm = tahmeez2(word_nm, harakat) len_word_nm = len(word_nm) while i < len_word_nm: # للعمل : # هذه حالة الألف التي أصلها ياء # وقد استغنينا عنها بأن جعلنا الحرف الناقص من الفعل الناقص حرفا تاما if harakat[i] in vconst.WRITTEN_HARAKA: word += word_nm[i] + vconst.WRITTEN_HARAKA[harakat[i]] else: word += word_nm[i] + harakat[i] i += 1 #-تحويل همزة القطع على الألف بعدها فتحة #وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة for (pat, rep) in vconst.STANDARD_REPLACEMENT: word = word.replace(pat, rep) return word
def decode_verb_tuple(self, tuple_verb): """ """ #abbrevated=False; verb_field_number=2; root_field_number=1; verb_cat_field_number=3; root = self.decode_root(tuple_verb[root_field_number].strip()); word = tuple_verb[verb_field_number].strip(); if not is_valid_infinitive_verb(word): return {"text":u"#\t'%s'\tis invalid verb "%word}; else: model = tuple_verb[0].strip(); #print word.encode("utf8") future_type = u"-"; future_type = araby.name(get_future_type_entree(future_type)); verb_cat = tuple_verb[verb_cat_field_number].strip(); # decode transitive flag #print "'%s'"%transitive; transitive = self.decode_transitive(verb_cat); #tenses=decode_tenses(verb_cat); # decode the tenses #init at False all=False; future=False; past=False; passive=False; imperative=False; confirmed=False; future_moode=False; # متعدي لغير العاقل unthink_trans=False; # متعدي للعاقل، تلقائيا اﻷفعال تقبل العاقل think_trans=True; #فعل قلوب reflexive_trans=False; #متعدي لمفعولين double_trans=False; if verb_cat =="A":# أمر فقط imperative=True; elif verb_cat =="D":# مبني للمجهول فقط passive=True; elif verb_cat =="I":#لاشخصي pass; elif verb_cat =="M":#جامد pass; elif verb_cat =="P":# ماضي فقط past=True; elif verb_cat =="Pu":# ماضي فقط لازم مطلق past=True; elif verb_cat =="Ry":# مضارع وأمر فقط future=True; imperative=True; elif verb_cat =="Su":# ماضي ومضارع future=True; past=True; confirmed=True; future_moode=True; elif verb_cat =="Sv":# ماضي ومضارع future=True; past=True; confirmed=True; future_moode=True; elif verb_cat =="Sx":# ماضي ومضارع future=True; past=True; confirmed=True; future_moode=True; elif verb_cat =="u":# لازم مطلق، لا مجهول ولا أمر future=True; past=True; confirmed=True; future_moode=True; elif verb_cat =="v":# لازم نسبي، كل اﻷزمنة، لكن المجهول مع المفرد الغائب future=True; past=True; imperative=True; confirmed=True; future_moode=True; passive=False; elif verb_cat in ("x","y", "yw", "ywz", "yy", "yz","yzw"): # ماضي فقط all=True; future=True; past=True; imperative=True; confirmed=True; future_moode=True; passive=False; if verb_cat in ("x","Sx"): unthink_trans=True; elif verb_cat in ("yw", "ywz", "yy", "yz","yzw", "Ry"): think_trans=True; if verb_cat in ("ywz","yz","yzw"): double_trans=True; if verb_cat in ("ywz", "yw", "yzw"): reflexive_trans=True; else: all=True; nb_trans=0; object_type=u"----" reflexive_type=u"----"; if transitive: transitive=u'متعد' nb_trans=1; if double_trans: nb_trans=2; if think_trans: object_type=u"عاقل"; if unthink_trans: object_type=u"غيرع"; if reflexive_trans: reflexive_type=u"فلبي"; else: transitive=u'لازم' ## codify the tense; tenses = self.encode_tense(all, past, future, imperative, passive, future_moode, confirmed) ## print ('\t'.join([word,future_type,str(transitive)])).encode('utf8'); nb_case=0; suggest=u""; triliteral=u"غيرثل" if is_triliteral_verb(word): triliteral=u"ثلاثي" if False: # search the future haraka for the triliteral verb liste_verb = find_alltriverb(word, araby.FATHA,True); # if there are more verb forms, select the first one filtered = [item for item in liste_verb if item['verb'] == word] if filtered: #~ word = liste_verb[0]["verb"] haraka = filtered[0]["haraka"] future_type = haraka; transitive_mark = filtered[0]["transitive"].strip(); if transitive_mark in (u"م",u"ك"): transitive = u"متعد" else: transitive = u"لازم" else: return {"text":u"#gen_verb_dict: %s error no tri verb"%word} if liste_verb: if len(liste_verb)>1: #suggest=u"هل تقصد؟<br/>" nb_case = len(liste_verb); # the other forms are suggested for i in range(1,len(liste_verb)): suggested_word = liste_verb[i]["verb"] suggested_haraka = liste_verb[i]["haraka"] suggested_transitive = liste_verb[i]["transitive"] future_form = get_future_form(suggested_word,suggested_haraka); suggest=u"\t".join([suggest,suggested_word,u"["+suggested_haraka+u"]"]); else:suggest="-" verb_dict = {'nb_case': str(nb_case), 'word': word, 'tenses': tenses, 'nb_trans': nb_trans, 'suggest': suggest, 'triliteral': triliteral, 'object_type': object_type, 'verb_cat': verb_cat, 'future_type': future_type, 'model': model, 'transitive': transitive, 'root': root, 'reflexive_type': reflexive_type, } return verb_dict
for key, group in groupby(aa5irHarf)] print(freqOfAa5irHarf) import collections counter = collections.Counter(aa5irHarf) print(counter) # Counter({1: 4, 2: 4, 3: 2, 5: 2, 4: 1}) print(counter.values()) # [4, 4, 2, 1, 2] print(counter.keys()) # [1, 2, 3, 4, 5] print(counter.most_common(3)) # [(1, 4), (2, 4), (3, 2)] print(counter.most_common(1)) kkey = counter.most_common(1) #we should write to file or save it anywhere #and also we should generalize it to all poems for each poet #القافية :آخر ساكن وبدور عالساكن اللي قبله مع الحرف المتحرك اللي قبل الساكن ال ما قبل الاخير print('********** Al Qafiya ************') for line in f: line1 = araby.strip_tatweel(line) letters, hrkat = araby.separate(line1) #print(letters.encode('utf8')) for m in hrkat: #لازم نعمل تعديلات if not araby.is_tatweel(m): print(araby.name(m)) print(''.join(m)) #Most Common Words بنعملهم بكل قصائد الشاعر
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append("../") #~ import pyarabic.harf as harf #from pyarabic import harf import pyarabic.araby as araby for c in araby.arabicrange(): print c.encode('utf8'), '\t', araby.name(c).encode('utf8'), print '\t', if araby.is_sukun(c): print "sukun", if araby.is_haraka(c): print "haraka", if araby.is_shadda(c): print "shadda", if araby.is_tatweel(c): print "tatweel", if araby.is_tashkeel(c): print "tashkeel", if araby.is_tanwin(c): print "tanwin", if araby.is_shortharaka(c): print "short haraka", if araby.is_ligature(c): print " ligature", if araby.is_ligature(c): print 'ligature', if araby.is_hamza(c): print 'hamza', if araby.is_alef(c): print 'alef', if araby.is_yehlike(c): print 'yeh', if araby.is_wawlike(c): print 'waw', if araby.is_teh(c): print 'teh', if araby.is_small(c): print 'small', if araby.is_weak(c): print 'weak', if araby.is_moon(c): print 'moon', if araby.is_sun(c): print 'sun', print araby.order(c), print