Python vocalizedlike Exemples, pyarabic.araby.vocalizedlike Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : analex.py Projet : ihfazhillah/mishkal

def check_partial_vocalized(word_vocalised, resulted_data):
    """
    if the entred word is vocalized fully or partially, 
    the analyzer return the vocalized like words
    This function treat the partial vocalized case.
    @param word_vocalised: the input word.
    @type word_vocalised: unicode.
    @param resulted_data: the founded resulat from dictionary.
    @type resulted_data: list of dict.
    @return: list of dictionaries of analyzed words with tags.
    @rtype: list.        
    """
    #print "check partial vocalization",word_vocalised.encode('utf8'),araby.is_vocalized(word_vocalised)
    #return resulted_data    
    filtred_data = []
    if not araby.is_vocalized(word_vocalised):
        return resulted_data
    else:
        #compare the vocalized output with the vocalized input
        #print ' is vocalized'
        for item in  resulted_data:
            if 'vocalized' in item and araby.vocalizedlike(word_vocalised,
              item['vocalized']):
                item['tags'] += ':'+analex_const.partialVocalizedTag
                filtred_data.append(item)
    return  filtred_data

Exemple #2

0

Afficher le fichier

Fichier : test_araby.py Projet : linuxscout/pyarabic

 def test_vocalized_similarity(self):
     """Test vocalized_similarity function ?"""
     word1 = u"ضَربٌ"
     word2 = u"ضَرْبٌ"
     self.assertTrue(ar.vocalizedlike(word1, word2))
     self.assertNotEqual(ar.vocalized_similarity(word1, word2), -2)
     self.assertTrue(ar.vocalized_similarity(word1, word2))

Exemple #3

0

Afficher le fichier

Fichier : analex.py Projet : ferplascencia/qalsadi

def check_partial_vocalized(word_vocalised, resulted_data):
    """
    if the entred word is vocalized fully or partially, 
    the analyzer return the vocalized like words
    This function treat the partial vocalized case.
    @param word_vocalised: the input word.
    @type word_vocalised: unicode.
    @param resulted_data: the founded resulat from dictionary.
    @type resulted_data: list of dict.
    @return: list of dictionaries of analyzed words with tags.
    @rtype: list.        
    """
    #print "check partial vocalization",word_vocalised.encode('utf8'),araby.is_vocalized(word_vocalised)
    #return resulted_data    
    filtred_data = []
    if not araby.is_vocalized(word_vocalised):
        return resulted_data
    else:
        #compare the vocalized output with the vocalized input
        #print ' is vocalized'
        for item in  resulted_data:
            if 'vocalized' in item and araby.vocalizedlike(word_vocalised,
              item['vocalized']):
                item['tags'] += ':'+analex_const.partialVocalizedTag
                filtred_data.append(item)
    return  filtred_data

Exemple #4

0

Afficher le fichier

Fichier : test_araby.py Projet : wmustafaAwad/pyarabic

 def test_vocalized_similarity(self):
     """Test vocalized_similarity function ?"""
     word1 = u"ضَربٌ"
     word2 = u"ضَرْبٌ"
     self.assertTrue(ar.vocalizedlike(word1, word2))
     self.assertNotEqual(ar.vocalized_similarity(word1, word2), -2)
     self.assertTrue(ar.vocalized_similarity(word1, word2))

Exemple #5

0

Afficher le fichier

Fichier : adaat.py Projet : ATouhou/mishkal

def Comparetashkeel(text):
	import tashkeel.tashkeel as ArabicVocalizer
	# the entred text is vocalized correctly
	correct_text=text;
	text=araby.stripTashkeel(text);
	vocalizer=ArabicVocalizer.TashkeelClass();
	vocalized_text=vocalizer.tashkeel(text);
	
	# compare voalized text with a correct text
	text1=correct_text;
	text2=vocalized_text;
	# remove collocations symboles
	text2=text2.replace("'","");
	text2=text2.replace("~","");
	
	#stemmer=tashaphyne.stemming.ArabicLightStemmer()
	list1=vocalizer.analyzer.tokenize(text1);
	list2=vocalizer.analyzer.tokenize(text2);
	print u":".join(list1).encode('utf8');
	print u":".join(list2).encode('utf8');
	correct=0;
	incorrect=0;
	total=len(list1);
	if len(list1)!=len(list2):
		print "lists haven't the same length";
	else:
		for i in range(total):
			if araby.vocalizedlike(list1[i],list2[i]):
				correct+=1;
			else:
				incorrect+=1;
	
	result=[vocalized_text,"correct:%0.2f%%"%round(correct*100.00/total,2),"incorrect:%0.2f%%"%round(incorrect*100.00/total,2),total]
	return result#correct*100/total;

Exemple #6

0

Afficher le fichier

Fichier : processor.py Projet : anesfassih/IRIBLI

def mot_except(word):
    """Détecte si un mot donné en entrée est un mot éxceptionnel ou non par rapport à la BDD."""
    combs = []
    for me in ExceptionalWord.objects.filter(
            unvoweled_form=araby.strip_diacritics(word)):
        if araby.vocalizedlike(word, me):
            combs.append(me)
    return combs

Exemple #7

0

Afficher le fichier

Fichier : processor.py Projet : anesfassih/IRIBLI

def mot_outil(word):
    """Détecte si un mot donné en entrée est un mot outil ou non par rapport à la BDD."""
    mo_combs = []
    combs = decoupage(word)
    for c in combs:
        for mo in ToolWord.objects.filter(
                unvoweled_form=araby.strip_diacritics(c['Base'])):
            if araby.vocalizedlike(c['Base'], mo.voweled_form):
                dico = {'tw_object': mo}
                dico['Préfixe'] = c['Préfixe']
                dico['Suffixe'] = c['Suffixe']
                mo_combs.append(dico)
    return mo_combs

Exemple #8

0

Afficher le fichier

Fichier : processor.py Projet : anesfassih/IRIBLI

def nom_propre(word):
    """Détecte si un mot donné en entrée est un mot spécifique ou non par rapport à la BDD."""
    np_combs = []
    combs = decoupage(word)
    for c in combs:
        for np in ProperNoun.objects.filter(
                unvoweled_form=araby.strip_diacritics(c['Base'])):
            if araby.vocalizedlike(c['Base'], np.voweled_form):
                dico = {'pn_object': np}
                dico['Base'] = c['Base']
                dico['Préfixe'] = c['Préfixe']
                dico['Suffixe'] = c['Suffixe']
                np_combs.append(dico)
    return np_combs

Exemple #9

0

Afficher le fichier

    def check_partial_vocalized(word_vocalised, resulted_data):
        """
        if the entred word is vocalized fully or partially,
        the analyzer return the vocalized like words
        This function treat the partial vocalized case.
        @param word_vocalised: the input word.
        @type word_vocalised: unicode.
        @param resulted_data: the founded resulat from dictionary.
        @type resulted_data: list of dict.
        @return: list of dictionaries of analyzed words with tags.
        @rtype: list.
        """
        filtred_data = []
        if not araby.is_vocalized(word_vocalised):
            return resulted_data
        else:
            #compare the vocalized output with the vocalized input
            #print ' is vocalized'
            for item in resulted_data:
                if 'vocalized' in item:
                    output = item['vocalized']
                    is_verb = "Verb" in item['type']
                    if araby.vocalizedlike(word_vocalised, output):
                        item[
                            'tags'] += ':' + analex_const.PARTIAL_VOCALIZED_TAG
                        filtred_data.append(item)
                        # حالة التقا الساكنين، مع نص مشكول مسبقا، والفعل في آخره كسرة بدل السكون
                    elif is_verb and word_vocalised.endswith(
                            araby.KASRA) and output.endswith(araby.SUKUN):
                        if araby.vocalizedlike(word_vocalised[:-1],
                                               output[:-1]):
                            item[
                                'tags'] += ':' + analex_const.PARTIAL_VOCALIZED_TAG
                            filtred_data.append(item)

        return filtred_data

Exemple #10

0

Afficher le fichier

def Comparetashkeel(text):
    import tashkeel.tashkeel as ArabicVocalizer
    # the entred text is vocalized correctly
    correct_text = text
    text = araby.stripTashkeel(text)
    vocalizer = ArabicVocalizer.TashkeelClass()
    vocalized_text = vocalizer.tashkeel(text)

    # compare voalized text with a correct text
    text1 = correct_text
    text2 = vocalized_text
    # remove collocations symboles
    text2 = text2.replace("'", "")
    text2 = text2.replace("~", "")

    #stemmer=tashaphyne.stemming.ArabicLightStemmer()
    list1 = vocalizer.analyzer.tokenize(text1)
    list2 = vocalizer.analyzer.tokenize(text2)
    print u":".join(list1).encode('utf8')
    print u":".join(list2).encode('utf8')
    correct = 0
    incorrect = 0
    total = len(list1)
    if len(list1) != len(list2):
        print "lists haven't the same length"
    else:
        for i in range(total):
            if araby.vocalizedlike(list1[i], list2[i]):
                correct += 1
            else:
                incorrect += 1

    result = [
        vocalized_text,
        "correct:%0.2f%%" % round(correct * 100.00 / total, 2),
        "incorrect:%0.2f%%" % round(incorrect * 100.00 / total, 2), total
    ]
    return result  #correct*100/total;

Exemple #11

0

Afficher le fichier

Fichier : analex.old.py Projet : ATouhou/mishkal

    def check_partial_vocalized(self, word_vocalised, resulted_data):
        """
		if the entred word is vocalized fully or partially, 
		the analyzer return the vocalized like words;
		This function treat the partial vocalized case.
		@param word_vocalised: the input word.
		@type word_vocalised: unicode.
		@param resulted_data: the founded resulat from dictionary.
		@type resulted_data: list of dict.
		@return: list of dictionaries of analyzed words with tags.
		@rtype: list.		
		"""
        # print word_vocalised.encode('utf8');
        filtred_data = []
        if not araby.isVocalized(word_vocalised):
            return resulted_data
        else:
            # compare the vocalized output with the vocalized input
            # print ' is vocalized';
            for item in resulted_data:
                if "vocalized" in item.__dict__ and araby.vocalizedlike(word_vocalised, item.__dict__["vocalized"]):
                    item.__dict__["tags"] += ":" + analex_const.partialVocalizedTag
                    filtred_data.append(item)
            return filtred_data

Exemple #12

0

Afficher le fichier

Fichier : adaat.py Projet : linuxscout/mishkal

def compare_tashkeel(text):
    """
    Compare tashkeel between vocalized text and automatic vocalized text
    """
    import tashkeel.tashkeel as ArabicVocalizer
    # the entred text is vocalized correctly
    correct_text = text.strip()
    text = araby.strip_tashkeel(text.strip())
    cpath = os.path.join(os.path.dirname(__file__), '../tmp/')
    vocalizer = ArabicVocalizer.TashkeelClass(mycache_path=cpath)
    #~vocalized_text = vocalizer.tashkeel(text)
    #~ vocalizer.disable_cache()

    vocalized_dict = vocalizer.tashkeel_ouput_html_suggest(text)
    
    # compare voalized text with a correct text
    text1 = correct_text
    #~text2 = vocalized_text
    displayed_html = u""
    
    #stemmer=tashaphyne.stemming.ArabicLightStemmer()
    #~texts = vocalizer.analyzer.split_into_phrases(text1)
    texts = [text1, ]
    list1 =[]
    for txt in texts:
        list1 += vocalizer.analyzer.tokenize(txt)
    list2 = vocalized_dict
    print u"\t".join(list1).encode('utf8')
    correct = 0
    incorrect = 0
    total = len(list1)
    if len(list1)!= len(list2):
        print "lists haven't the same length", len(list1), len(list2)
        for i in range(min(len(list1), len(list2))):
            print (u"'%s'\t'%s'"%(list1[i], list2[i].get('chosen',''))).encode("utf8")
        sys.exit()
    else:
        for i in range(total):
            wo1 = list1[i]
            wo1_strip = wo1            
            wo2 = list2[i]['chosen']
            wo2_strip = list2[i]['semi']  # words without inflection mark
            inflect = list2[i]['inflect']
            link = list2[i]['link']
            rule = list2[i]['rule']
            style = "diff"
            #~if araby.is_vocalized(wo2) and araby.vocalizedlike(wo1, wo2):
            if araby.vocalizedlike(wo1, wo2):
                if wo2 == "\n":
                    wo2 = "<br/>"
                #~displayed_html += u" " + wo2
                displayed_html += u" <span id='diff'  class='%s' original='%s' inflect='%s' link='%s' rule='%s'>%s</span>" % ( style, wo1, inflect, link, str(rule), wo2)

                correct += 1
            else:
                incorrect += 1
                # green for last mark difference
                wo1_strip = wo1
                #~wo2_strip = araby.strip_lastharaka(wo2)
                if araby.vocalizedlike(wo1_strip, wo2_strip):
                    style = 'diff-mark'
                else:
                    # if the last marks are equal
                    wm1 = wo1[-1:]
                    wm2 = wo2[-1:]
                    if (araby.is_haraka(wm1) and araby.is_haraka(wm2) and wm1 == wm2) \
                    or (bool(araby.is_haraka(wm1)) ^  bool(araby.is_haraka(wm2))):
                        style = "diff-word"
                    else:
                        style = 'diff-all'
                displayed_html += u" <span id='diff'  class='%s' original='%s' inflect='%s' link='%s' rule='%s'>%s</span>" % ( style, wo1, inflect, link, str(rule), wo2)
    per_correct = round(correct*100.00/total, 2)
    per_incorrect = round(incorrect*100.00/total, 2)
    result = [displayed_html, "correct:%0.2f%%, incorrect:%0.2f%%"%(per_correct, per_incorrect)]
    return result#correct*100/total

Exemple #13

0

Afficher le fichier

Fichier : test_unicode.py Projet : sinkingtitanic/pyarabic

    u"الْعَرَيِيّةُ الفصحى",
    u"غير مشكول",
    "Taha",
]
word1 = u""
for word in word_list:
    print(word, '\t', end=" ")
    if araby.is_vocalized(word): print(' is vocalized', end=" ")
    if araby.is_vocalizedtext(word): print(' is vocalized text', end=" ")
    if araby.is_arabicword(word): print(' is valid word', end=" ")
    else: print("invalid arabic word", end=" ")
    print(' strip harakat', araby.strip_harakat(word), end=" ")
    print(' strip tashkeel', araby.strip_tashkeel(word), end=" ")
    print(' strip tatweel', araby.strip_tatweel(word), end=" ")
    print(' normalize ligature ', araby.normalize_ligature(word), end=" ")
    if araby.vocalizedlike(word, word1): print("vocalized_like", end=" ")
    print()
    word1 = word
if araby.vocalizedlike(u"العربية", u"العرَبية"):
    print("vocalized_like", end=" ")
word = u"الْعَرَيِيّةُ"
word_list = [
    u"الْعَرَيِيّةُ",
    u"العربية",
    u"الْعَرَيِيّةُ الفصحى",
    u"غير مشكول",
    "Taha",
]
word1 = u""
for word in word_list:
    print(word, '\t', end=" ")

Exemple #14

0

Afficher le fichier

Fichier : test_araby.py Projet : linuxscout/pyarabic

 def test_vocalizedlike(self):
     """Test vocalizedlike function ?"""
     word1 = u"ضَربٌ"
     word2 = u"ضَرْبٌ"
     self.assertTrue(ar.vocalizedlike(word1, word2))

Exemple #15

0

Afficher le fichier

Fichier : processor.py Projet : anesfassih/IRIBLI

def decoupage(word):
    """Découpe le mot donné en entrée (word) en (préfixes, racine et suffixes). La sortie de la fonction est une liste
    de dictionnaires regroupant toutes les combinaisons syntaxiquement correctes d'aprés la compatibilitée entre les
     préfixes et sufixes détéctés et la taille de la racine."""
    word_unvocalized = araby.strip_diacritics(word)
    prefixes, suffixes = [""], [""]
    combinaisons_possibles = []
    for p in Prefixe.objects.all():
        if word_unvocalized.startswith(p.unvoweled_form):
            # print("p:"+p.unvoweled_form)
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[:len(p.voweled_form)],
                                       p.voweled_form):
                    prefixes.append(p)
            else:
                prefixes.append(p)
    for s in Suffixe.objects.all():
        if word_unvocalized.endswith(s.unvoweled_form):
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[-len(s.voweled_form):],
                                       s.voweled_form):
                    suffixes.append(s)
            else:
                suffixes.append(s)

    for pr in prefixes:
        for sf in suffixes:
            # Validation criteria
            if pr != "" and sf != "":
                if (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) <= 2 or \
                    (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) > 9:
                    continue
                if ((pr.classe[0] == 'N' and sf.classe[0] == 'V')
                        or (pr.classe[0] == 'V' and sf.classe[0] == 'N')
                        or (pr.classe in ['N1', 'N2', 'N3', 'N5'])):
                    continue
            # Si on est là -> le préfixe est compatible avec le suffixe, et la taille de la base est accéptable
            base = word
            # Supprimer le prefixe de la base // En gardant le Tachkil
            if pr:
                for char in pr.unvoweled_form:
                    while char != base[0]:
                        base = base[1:]
                    base = base[1:]
                while araby.is_tashkeel(base[0]):
                    base = base[1:]

            # Supprimer le suffixe de la base // En gardant le Tachkil
            if sf:
                r_sf = [c for c in sf.unvoweled_form]
                r_sf.reverse()
                for char in r_sf:
                    base = base[:base.rindex(char)]

            combinaisons_possibles.append({
                'Base': base,
                'Préfixe': pr,
                'Suffixe': sf
            })

    return combinaisons_possibles

Exemple #16

0

Afficher le fichier

Fichier : test_araby.py Projet : youssefsharief/pyarabic

    def test_vocalizedlike(self):

        # vocalizedlike(word1, word2)
        word1 = u"ضَربٌ"
        word2 = u"ضَرْبٌ"
        self.assertTrue(Araby.vocalizedlike(word1, word2))

Exemple #17

0

Afficher le fichier

from pyarabic.unshape import unshaping_line
import arabic_reshaper
from pyarabic import araby
f = open('data.txt','r')
lignes = f.readlines()
print(araby.vocalizedlike('ب ر ي ت'.replace(' ', ''), 'بريت'))
for ligne in lignes:
    f2 = open('right.txt','r')
    rights = f2.readlines()
    print(rights)
    for right in rights:
        if araby.vocalizedlike(unshaping_line(ligne).replace(' ', ''), unshaping_line(right)):
            print(unshaping_line(right).encode('utf8'))
            print('بريت')

Exemple #18

0

Afficher le fichier

Fichier : adaat.py Projet : ihfazhillah/mishkal

def compare_tashkeel(text):
    """
    Compare tashkeel between vocalized text and automatic vocalized text
    """
    import tashkeel.tashkeel as ArabicVocalizer
    # the entred text is vocalized correctly
    correct_text = text.strip()
    text = araby.strip_tashkeel(text.strip())
    vocalizer = ArabicVocalizer.TashkeelClass()
    #~vocalized_text = vocalizer.tashkeel(text)
    vocalized_dict = vocalizer.tashkeel_ouput_html_suggest(text)

    # compare voalized text with a correct text
    text1 = correct_text
    #~text2 = vocalized_text
    displayed_html = u""

    #stemmer=tashaphyne.stemming.ArabicLightStemmer()
    #~texts = vocalizer.analyzer.split_into_phrases(text1)
    texts = [
        text1,
    ]
    list1 = []
    for txt in texts:
        list1 += vocalizer.analyzer.tokenize(txt)
    list2 = vocalized_dict
    print u"\t".join(list1).encode('utf8')
    correct = 0
    incorrect = 0
    total = len(list1)
    if len(list1) != len(list2):
        print "lists haven't the same length", len(list1), len(list2)
        for i in range(min(len(list1), len(list2))):
            print(u"'%s'\t'%s'" %
                  (list1[i], list2[i].get('chosen', ''))).encode("utf8")
        sys.exit()
    else:
        for i in range(total):
            wo1 = list1[i]
            wo1_strip = wo1
            wo2 = list2[i]['chosen']
            wo2_strip = list2[i]['semi']  # words without inflection mark
            inflect = list2[i]['inflect']
            link = list2[i]['link']
            rule = list2[i]['rule']
            style = "diff"
            #~if araby.is_vocalized(wo2) and araby.vocalizedlike(wo1, wo2):
            if araby.vocalizedlike(wo1, wo2):
                if wo2 == "\n":
                    wo2 = "<br/>"
                #~displayed_html += u" " + wo2
                displayed_html += u" <span id='diff'  class='%s' original='%s' inflect='%s' link='%s' rule='%s'>%s</span>" % (
                    style, wo1, inflect, link, str(rule), wo2)

                correct += 1
            else:
                incorrect += 1
                # green for last mark difference
                wo1_strip = wo1
                #~wo2_strip = araby.strip_lastharaka(wo2)
                if araby.vocalizedlike(wo1_strip, wo2_strip):
                    style = 'diff-mark'
                else:
                    # if the last marks are equal
                    wm1 = wo1[-1:]
                    wm2 = wo2[-1:]
                    if (araby.is_haraka(wm1) and araby.is_haraka(wm2) and wm1 == wm2) \
                    or (bool(araby.is_haraka(wm1)) ^  bool(araby.is_haraka(wm2))):
                        style = "diff-word"
                    else:
                        style = 'diff-all'
                displayed_html += u" <span id='diff'  class='%s' original='%s' inflect='%s' link='%s' rule='%s'>%s</span>" % (
                    style, wo1, inflect, link, str(rule), wo2)
    per_correct = round(correct * 100.00 / total, 2)
    per_incorrect = round(incorrect * 100.00 / total, 2)
    result = [
        displayed_html,
        "correct:%0.2f%%, incorrect:%0.2f%%" % (per_correct, per_incorrect)
    ]
    return result  #correct*100/total

Exemple #19

0

Afficher le fichier

Fichier : test_unicode.py Projet : linuxscout/pyarabic

"Taha",
]
word1=u""
for word in word_list:
    print word.encode('utf8'),'\t',
    if araby.is_vocalized(word): print ' is vocalized',
##    if araby.isArabicstring(word): print ' iisArabicstring',
##    else:print ' invalid arabicstring',
    if araby.is_vocalizedtext(word): print ' is vocalized text',
    if araby.is_arabicword(word): print ' is valid word',
    else: print "invalid arabic word",
    print ' strip harakat', araby.strip_harakat(word).encode('utf8'),
    print ' strip tashkeel', araby.strip_tashkeel(word).encode('utf8'),
    print ' strip tatweel',araby.strip_tatweel(word).encode('utf8'),
    print ' normalize ligature ', araby.normalize_ligature(word).encode('utf8'),
    if araby.vocalizedlike(word, word1): print "vocalized_like",
    print;
    word1=word;
if araby.vocalizedlike(u"العربية",u"العرَبية"): print "vocalized_like",
word=u"الْعَرَيِيّةُ"
word_list=[
u"الْعَرَيِيّةُ",
u"العربية",
u"الْعَرَيِيّةُ الفصحى",
u"غير مشكول",
"Taha",
]
word1=u""
for word in word_list:
    print word.encode('utf8'),'\t',
    if araby.is_vocalized(word): print ' is vocalized',

Exemple #20

0

Afficher le fichier

Fichier : test_unicode.py Projet : Guibod/pyarabic

"Taha",
]
word1=""
for word in word_list:
    print(word,'\t')
    if araby.is_vocalized(word): print(' is vocalized')
##    if araby.isArabicstring(word): print ' iisArabicstring',
##    else:print ' invalid arabicstring',
    if araby.is_vocalizedtext(word): print(' is vocalized text')
    if araby.is_arabicword(word): print(' is valid word')
    else: print("invalid arabic word")
    print(' strip harakat', araby.strip_harakat(word))
    print(' strip tashkeel', araby.strip_tashkeel(word))
    print(' strip tatweel',araby.strip_tatweel(word))
    print(' normalize ligature ', araby.normalize_ligature(word))
    if araby.vocalizedlike(word, word1): print("vocalized_like")
    print();
    word1=word;
if araby.vocalizedlike("العربية","العرَبية"): print("vocalized_like")
word="الْعَرَيِيّةُ"
word_list=[
"الْعَرَيِيّةُ",
"العربية",
"الْعَرَيِيّةُ الفصحى",
"غير مشكول",
"Taha",
]
word1=""
for word in word_list:
    print(word,'\t')
    if araby.is_vocalized(word): print(' is vocalized')

Exemple #21

0

Afficher le fichier

]
word1 = u""
for word in word_list:
    print word.encode('utf8'), '\t',
    if araby.is_vocalized(word): print ' is vocalized',
    ##    if araby.isArabicstring(word): print ' iisArabicstring',
    ##    else:print ' invalid arabicstring',
    if araby.is_vocalizedtext(word): print ' is vocalized text',
    if araby.is_arabicword(word): print ' is valid word',
    else: print "invalid arabic word",
    print ' strip harakat', araby.strip_harakat(word).encode('utf8'),
    print ' strip tashkeel', araby.strip_tashkeel(word).encode('utf8'),
    print ' strip tatweel', araby.strip_tatweel(word).encode('utf8'),
    print ' normalize ligature ', araby.normalize_ligature(word).encode(
        'utf8'),
    if araby.vocalizedlike(word, word1): print "vocalized_like",
    print
    word1 = word
if araby.vocalizedlike(u"العربية", u"العرَبية"): print "vocalized_like",
word = u"الْعَرَيِيّةُ"
word_list = [
    u"الْعَرَيِيّةُ",
    u"العربية",
    u"الْعَرَيِيّةُ الفصحى",
    u"غير مشكول",
    "Taha",
]
word1 = u""
for word in word_list:
    print word.encode('utf8'), '\t',
    if araby.is_vocalized(word): print ' is vocalized',

Exemple #22

0

Afficher le fichier

Fichier : test_araby.py Projet : wmustafaAwad/pyarabic

 def test_vocalizedlike(self):
     """Test vocalizedlike function ?"""
     word1 = u"ضَربٌ"
     word2 = u"ضَرْبٌ"
     self.assertTrue(ar.vocalizedlike(word1, word2))