def test_word_text(self):

        # is_vocalized(word)
        self.assertFalse(Araby.is_vocalized(u'العربية'))
        self.assertTrue(Araby.is_vocalized(u'الْعَرَبِيّةُ'))

        # is_vocalized(word)
        self.assertFalse(Araby.is_vocalizedtext(u"العربية لغة جميلة"))
        self.assertTrue(Araby.is_vocalizedtext(u'الْعَرَبيَّة لُغَةٌ جَمِيلَةٌ'))

        # is_arabicstring TODO: add more examples
        self.assertTrue(Araby.is_arabicstring(u'العربية'))

        # is_arabicrange TODO: add test

        # is_arabicword TODO: test other cases

        self.assertFalse(Araby.is_arabicword(u""))

        self.assertFalse(Araby.is_arabicword(u"ْلاندخل")) # start with sukun

        self.assertFalse(Araby.is_arabicword(u'ؤكل')) # start with waw hamza above
        self.assertFalse(Araby.is_arabicword(u'ئكل')) # start with waw hamza above4
        self.assertFalse(Araby.is_arabicword(u'ةدخل')) # start with teh_marbuta

        self.assertTrue(Araby.is_arabicword(u"العربية"))
 def search_arabic(self, q, fetch_subgraph = True, limit = DEFAULT_LIMIT,
                   fetchplan = DEFAULT_FETCHPLAN):
     """
     Searches for given label intelligently handling vocalization.
     (This does not make much sense without a fetchplan as you will get
     index nodes only.)
     
     """
     # If query is not vocalized, search unvocalized index and eventually
     # return subtree
     if not araby.is_vocalized(q):
         return self.search_index(q, fetch_subgraph,
                                  "ArabicNode.unvocalized_label", limit,
                                  fetchplan)
         
     # If it is vocalized, search unvocalized index and check for
     # "compatibility" of vocalization
     matches = self.search_index(araby.strip_tashkeel(q), False,
                                 "ArabicNode.unvocalized_label", limit)
     rids = [n.rid for n in matches.primary_results
             if Tools.is_vocalized_like(q, n.data["label"])]
     # Ignore vocalization if there is no compatible one
     if not rids:
         rids = [n.rid for n in matches.primary_results]
     return self.get_nodes(rids, fetch_subgraph, limit, fetchplan)
Exemple #3
0
    def search_arabic(self,
                      q,
                      fetch_subgraph=True,
                      limit=DEFAULT_LIMIT,
                      fetchplan=DEFAULT_FETCHPLAN):
        """
        Searches for given label intelligently handling vocalization.
        (This does not make much sense without a fetchplan as you will get
        index nodes only.)
        
        """
        # If query is not vocalized, search unvocalized index and eventually
        # return subtree
        if not araby.is_vocalized(q):
            return self.search_index(q, fetch_subgraph,
                                     "ArabicNode.unvocalized_label", limit,
                                     fetchplan)

        # If it is vocalized, search unvocalized index and check for
        # "compatibility" of vocalization
        matches = self.search_index(araby.strip_tashkeel(q), False,
                                    "ArabicNode.unvocalized_label", limit)
        rids = [
            n.rid for n in matches.primary_results
            if Tools.is_vocalized_like(q, n.data["label"])
        ]
        # Ignore vocalization if there is no compatible one
        if not rids:
            rids = [n.rid for n in matches.primary_results]
        return self.get_nodes(rids, fetch_subgraph, limit, fetchplan)
Exemple #4
0
def check_partial_vocalized(word_vocalised, resulted_data):
    """
    if the entred word is vocalized fully or partially, 
    the analyzer return the vocalized like words
    This function treat the partial vocalized case.
    @param word_vocalised: the input word.
    @type word_vocalised: unicode.
    @param resulted_data: the founded resulat from dictionary.
    @type resulted_data: list of dict.
    @return: list of dictionaries of analyzed words with tags.
    @rtype: list.        
    """
    #print "check partial vocalization",word_vocalised.encode('utf8'),araby.is_vocalized(word_vocalised)
    #return resulted_data    
    filtred_data = []
    if not araby.is_vocalized(word_vocalised):
        return resulted_data
    else:
        #compare the vocalized output with the vocalized input
        #print ' is vocalized'
        for item in  resulted_data:
            if 'vocalized' in item and araby.vocalizedlike(word_vocalised,
              item['vocalized']):
                item['tags'] += ':'+analex_const.partialVocalizedTag
                filtred_data.append(item)
    return  filtred_data
Exemple #5
0
def check_partial_vocalized(word_vocalised, resulted_data):
    """
    if the entred word is vocalized fully or partially, 
    the analyzer return the vocalized like words
    This function treat the partial vocalized case.
    @param word_vocalised: the input word.
    @type word_vocalised: unicode.
    @param resulted_data: the founded resulat from dictionary.
    @type resulted_data: list of dict.
    @return: list of dictionaries of analyzed words with tags.
    @rtype: list.        
    """
    #print "check partial vocalization",word_vocalised.encode('utf8'),araby.is_vocalized(word_vocalised)
    #return resulted_data    
    filtred_data = []
    if not araby.is_vocalized(word_vocalised):
        return resulted_data
    else:
        #compare the vocalized output with the vocalized input
        #print ' is vocalized'
        for item in  resulted_data:
            if 'vocalized' in item and araby.vocalizedlike(word_vocalised,
              item['vocalized']):
                item['tags'] += ':'+analex_const.partialVocalizedTag
                filtred_data.append(item)
    return  filtred_data
Exemple #6
0
 def check_fields(self, fields):
     """ check fields """
     voc = fields.get('vocalized', '')
     if not voc:
         return "Error: Empty vocalized"
     if not ar.is_arabicword(voc):
         return "Error: Invalid Arabic word "
     # not duplicated
     if voc in self.index:
         return "Error: Duplicated Entry "
     self.index.append(voc)
     # valid verb form
     if not ar.is_vocalized(voc):
         return "Error: Not Vocalized"
     # valid vocalization
     if not verify_tashkeel(voc):
         return "Error: Error in Vocalization "
     return "ok"
Exemple #7
0
 def check_fields(self, fields):
     """ check fields """
     voc = fields.get('vocalized', '')
     unvoc = fields.get('unvocalized', '')
     if not voc:
         return "Error: Empty vocalized"
     if not ar.is_arabicword(voc):
         return "Error: Invalid Arabic word "
     if not is_valid_infinitive_verb(voc):
         return "Error: Invalid Arabic infinitive verb "
     # not duplicated
     if voc in self.index:
         if len(unvoc) <= 3:
             return "Warning: Duplicated Entry "
         else:
             return "Error: Duplicated Entry "
     self.index.append(voc)
     # valid verb form
     if not ar.is_vocalized(voc):
         return "Error: Not Vocalized"
     # valid vocalization
     if not verify_tashkeel(voc):
         return "Error: Error in Vocalization "
     return "ok"
Exemple #8
0
    def check_partial_vocalized(word_vocalised, resulted_data):
        """
        if the entred word is vocalized fully or partially,
        the analyzer return the vocalized like words
        This function treat the partial vocalized case.
        @param word_vocalised: the input word.
        @type word_vocalised: unicode.
        @param resulted_data: the founded resulat from dictionary.
        @type resulted_data: list of dict.
        @return: list of dictionaries of analyzed words with tags.
        @rtype: list.
        """
        filtred_data = []
        if not araby.is_vocalized(word_vocalised):
            return resulted_data
        else:
            #compare the vocalized output with the vocalized input
            #print ' is vocalized'
            for item in resulted_data:
                if 'vocalized' in item:
                    output = item['vocalized']
                    is_verb = "Verb" in item['type']
                    if araby.vocalizedlike(word_vocalised, output):
                        item[
                            'tags'] += ':' + analex_const.PARTIAL_VOCALIZED_TAG
                        filtred_data.append(item)
                        # حالة التقا الساكنين، مع نص مشكول مسبقا، والفعل في آخره كسرة بدل السكون
                    elif is_verb and word_vocalised.endswith(
                            araby.KASRA) and output.endswith(araby.SUKUN):
                        if araby.vocalizedlike(word_vocalised[:-1],
                                               output[:-1]):
                            item[
                                'tags'] += ':' + analex_const.PARTIAL_VOCALIZED_TAG
                            filtred_data.append(item)

        return filtred_data
Exemple #9
0
    def contextMenuEvent(self, event):
        popup_menu = self.createStandardContextMenu()
        #~popup_menu = QMenu()
        #~self.setContextMenuPolicy()
        RightToLeft = 1;
        # Select the word under the cursor.
        cursor = self.textCursor()
        cursor.select(QTextCursor.WordUnderCursor)
        self.setTextCursor(cursor)
 
        # Check if the selected word is misspelled and offer spelling
        # suggestions if it is.
        if self.textCursor().hasSelection():
            

            
            #~text = (unicode(self.textCursor().selectedText()))
            #this is a workaround for QT bug when double click selects Arabic punctuation marks
            # plus the word in the text editor see https://bugreports.qt-project.org/browse/QTBUG-42397
            originaltext = unicode(self.textCursor().selectedText())
            
            arabicmarks = [u'؟',u'،',u'؛',u'“',u'”',u'‘',u'’']
            holder = originaltext[-1]
            if holder in arabicmarks:
                self.pretxt = holder
            else:
                self.pretxt=''
            text = originaltext.strip(u'؟،؛“”‘’')   

            # the word is aleady analyzed         
            if self.dict.check(text):
                spell_menu = QMenu(u'المزيد...')
                spell_menu.setLayoutDirection(RightToLeft)
                suggests = self.dict.suggest(text)
                for word in suggests[:10]:
                    action = SpellAction(word, spell_menu)
                    action.correct.connect(self.correctWord)
                    #~spell_menu.addAction(action)
                    popup_menu.addAction(action)

                    
                #~spell_menu.setStyleSheet("QMenu {font: 32px;  margin: 2px;}")
                popup_menu.setStyleSheet("QMenu {font: 24px;}")
                # Only add the spelling suggests to the menu if there are
                # suggestions.
                
                #~if len(spell_menu.actions()) != 0:
                    #~popup_menu.insertSeparator(popup_menu.actions()[0])
                    #~popup_menu.insertMenu(popup_menu.actions()[0], spell_menu)
                if len (suggests)>10:
                    for word in suggests[10:]:
                        action = SpellAction(word, spell_menu)
                        action.correct.connect(self.correctWord)
                        #~spell_menu.addAction(action)
                        spell_menu.addAction(action)                    
                    spell_menu.setStyleSheet("QMenu {font: 24px;}")
                    
                    popup_menu.addSeparator()
                    popup_menu.addMenu(spell_menu)
                
                if len(suggests) == 1 and not araby.is_vocalized(suggests[0]):
                    addtodict_action = popup_menu.addAction(u'أضف للقاموس')
                    #~addtodict_action.triggered.connect( lambda x = x = originaltext: self.add_to_dict(x))
                    addtodict_action.triggered.connect( lambda : self.add_to_dict(originaltext))
                    # if the word hs no suggestions
                    # we lookup for customized vocalization
                    suggests = self.dict.custom_dict.lookup(word)
                    for word in suggests:
                        action = SpellAction(word, spell_menu)
                        action.correct.connect(self.correctWord)
                        #~spell_menu.addAction(action)
                        popup_menu.addAction(action)                    
                    
                    
            else:
                # redo taskeel for this word
                #~pass;
                # if the word hs no suggestions
                # we lookup for customized vocalization
                suggests = self.dict.custom_dict.lookup(text)
                for word in suggests:
                    action = SpellAction(word, spell_menu)
                    action.correct.connect(self.correctWord)
                    #~spell_menu.addAction(action)
                    popup_menu.addAction(action)
        popup_menu.exec_(event.globalPos())
    if araby.is_moon(c): print('moon', end=" ")
    if araby.is_sun(c): print('sun', end=" ")
    print(araby.order(c), end=" ")
    print()
word = u"الْعَرَيِيّةُ"
word_list = [
    u"الْعَرَيِيّةُ",
    u"العربية",
    u"الْعَرَيِيّةُ الفصحى",
    u"غير مشكول",
    "Taha",
]
word1 = u""
for word in word_list:
    print(word, '\t', end=" ")
    if araby.is_vocalized(word): print(' is vocalized', end=" ")
    if araby.is_vocalizedtext(word): print(' is vocalized text', end=" ")
    if araby.is_arabicword(word): print(' is valid word', end=" ")
    else: print("invalid arabic word", end=" ")
    print(' strip harakat', araby.strip_harakat(word), end=" ")
    print(' strip tashkeel', araby.strip_tashkeel(word), end=" ")
    print(' strip tatweel', araby.strip_tatweel(word), end=" ")
    print(' normalize ligature ', araby.normalize_ligature(word), end=" ")
    if araby.vocalizedlike(word, word1): print("vocalized_like", end=" ")
    print()
    word1 = word
if araby.vocalizedlike(u"العربية", u"العرَبية"):
    print("vocalized_like", end=" ")
word = u"الْعَرَيِيّةُ"
word_list = [
    u"الْعَرَيِيّةُ",
    if araby.is_weak(c): print ('weak'),
    if araby.is_moon(c): print ('moon'),
    if araby.is_sun(c):print ('sun'),
    print (araby.order(c)),
    print ();
word=u"الْعَرَيِيّةُ"
word_list=[
u"الْعَرَيِيّةُ",
u"العربية",
u"الْعَرَيِيّةُ الفصحى",
u"غير مشكول",
"Taha",
u"سئل لأنه يؤم الإمام"
]
word1=u""
for word in word_list:
    print (word)
    if araby.is_vocalized(word): print (' is vocalized')
    if araby.is_vocalizedtext(word): print (' is vocalized text')
    if araby.is_arabicword(word): print (' is valid word')
    else: print ("invalid arabic word")
    print (' strip harakat', araby.strip_harakat(word))
    print (' strip tashkeel', araby.strip_tashkeel(word))
    print (' strip tatweel',araby.strip_tatweel(word))
    print (' normalize ligature ', araby.normalize_ligature(word))
    print (' normalize hamza', araby.normalize_hamza(word))
    if araby.vocalizedlike(word, word1): print ("vocalized_like")
    word1=word;
if araby.vocalizedlike(u"العربية",u"العرَبية"): print ("vocalized_like")

Exemple #12
0
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c):print 'sun',
    print araby.order(c),
    print;
word=u"الْعَرَيِيّةُ"
word_list=[
u"الْعَرَيِيّةُ",
u"العربية",
u"الْعَرَيِيّةُ الفصحى",
u"غير مشكول",
"Taha",
]
word1=u""
for word in word_list:
    print word.encode('utf8'),'\t',
    if araby.is_vocalized(word): print ' is vocalized',
##    if araby.isArabicstring(word): print ' iisArabicstring',
##    else:print ' invalid arabicstring',
    if araby.is_vocalizedtext(word): print ' is vocalized text',
    if araby.is_arabicword(word): print ' is valid word',
    else: print "invalid arabic word",
    print ' strip harakat', araby.strip_harakat(word).encode('utf8'),
    print ' strip tashkeel', araby.strip_tashkeel(word).encode('utf8'),
    print ' strip tatweel',araby.strip_tatweel(word).encode('utf8'),
    print ' normalize ligature ', araby.normalize_ligature(word).encode('utf8'),
    if araby.vocalizedlike(word, word1): print "vocalized_like",
    print;
    word1=word;
if araby.vocalizedlike(u"العربية",u"العرَبية"): print "vocalized_like",
word=u"الْعَرَيِيّةُ"
word_list=[
Exemple #13
0
def decoupage(word):
    """Découpe le mot donné en entrée (word) en (préfixes, racine et suffixes). La sortie de la fonction est une liste
    de dictionnaires regroupant toutes les combinaisons syntaxiquement correctes d'aprés la compatibilitée entre les
     préfixes et sufixes détéctés et la taille de la racine."""
    word_unvocalized = araby.strip_diacritics(word)
    prefixes, suffixes = [""], [""]
    combinaisons_possibles = []
    for p in Prefixe.objects.all():
        if word_unvocalized.startswith(p.unvoweled_form):
            # print("p:"+p.unvoweled_form)
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[:len(p.voweled_form)],
                                       p.voweled_form):
                    prefixes.append(p)
            else:
                prefixes.append(p)
    for s in Suffixe.objects.all():
        if word_unvocalized.endswith(s.unvoweled_form):
            if araby.is_vocalized(word):
                if araby.vocalizedlike(word[-len(s.voweled_form):],
                                       s.voweled_form):
                    suffixes.append(s)
            else:
                suffixes.append(s)

    for pr in prefixes:
        for sf in suffixes:
            # Validation criteria
            if pr != "" and sf != "":
                if (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) <= 2 or \
                    (len(word_unvocalized) - len(pr.unvoweled_form) - len(sf.unvoweled_form)) > 9:
                    continue
                if ((pr.classe[0] == 'N' and sf.classe[0] == 'V')
                        or (pr.classe[0] == 'V' and sf.classe[0] == 'N')
                        or (pr.classe in ['N1', 'N2', 'N3', 'N5'])):
                    continue
            # Si on est là -> le préfixe est compatible avec le suffixe, et la taille de la base est accéptable
            base = word
            # Supprimer le prefixe de la base // En gardant le Tachkil
            if pr:
                for char in pr.unvoweled_form:
                    while char != base[0]:
                        base = base[1:]
                    base = base[1:]
                while araby.is_tashkeel(base[0]):
                    base = base[1:]

            # Supprimer le suffixe de la base // En gardant le Tachkil
            if sf:
                r_sf = [c for c in sf.unvoweled_form]
                r_sf.reverse()
                for char in r_sf:
                    base = base[:base.rindex(char)]

            combinaisons_possibles.append({
                'Base': base,
                'Préfixe': pr,
                'Suffixe': sf
            })

    return combinaisons_possibles
Exemple #14
0
    if araby.is_moon(c): print 'moon',
    if araby.is_sun(c): print 'sun',
    print araby.order(c),
    print
word = u"الْعَرَيِيّةُ"
word_list = [
    u"الْعَرَيِيّةُ",
    u"العربية",
    u"الْعَرَيِيّةُ الفصحى",
    u"غير مشكول",
    "Taha",
]
word1 = u""
for word in word_list:
    print word.encode('utf8'), '\t',
    if araby.is_vocalized(word): print ' is vocalized',
    ##    if araby.isArabicstring(word): print ' iisArabicstring',
    ##    else:print ' invalid arabicstring',
    if araby.is_vocalizedtext(word): print ' is vocalized text',
    if araby.is_arabicword(word): print ' is valid word',
    else: print "invalid arabic word",
    print ' strip harakat', araby.strip_harakat(word).encode('utf8'),
    print ' strip tashkeel', araby.strip_tashkeel(word).encode('utf8'),
    print ' strip tatweel', araby.strip_tatweel(word).encode('utf8'),
    print ' normalize ligature ', araby.normalize_ligature(word).encode(
        'utf8'),
    if araby.vocalizedlike(word, word1): print "vocalized_like",
    print
    word1 = word
if araby.vocalizedlike(u"العربية", u"العرَبية"): print "vocalized_like",
word = u"الْعَرَيِيّةُ"
Exemple #15
0
    def contextMenuEvent(self, event):
        popup_menu = self.createStandardContextMenu()
        #~popup_menu = QMenu()
        #~self.setContextMenuPolicy()
        RightToLeft = 1
        # Select the word under the cursor.
        cursor = self.textCursor()
        cursor.select(QTextCursor.WordUnderCursor)
        self.setTextCursor(cursor)

        # Check if the selected word is misspelled and offer spelling
        # suggestions if it is.
        if self.textCursor().hasSelection():

            #~text = (unicode(self.textCursor().selectedText()))
            #this is a workaround for QT bug when double click selects Arabic punctuation marks
            # plus the word in the text editor see https://bugreports.qt-project.org/browse/QTBUG-42397
            #~ originaltext = unicode(self.textCursor().selectedText())
            originaltext = self.textCursor().selectedText()

            arabicmarks = [u'؟', u'،', u'؛', u'“', u'”', u'‘', u'’']
            holder = originaltext[-1]
            if holder in arabicmarks:
                self.pretxt = holder
            else:
                self.pretxt = ''
            text = originaltext.strip(u'؟،؛“”‘’')
            spell_menu = QMenu(u'المزيد...')
            spell_menu.setLayoutDirection(RightToLeft)
            # the word is aleady analyzed
            if self.dict.check(text):

                suggests = self.dict.suggest(text)
                for word in suggests[:10]:
                    action = SpellAction(word, spell_menu)
                    action.correct.connect(self.correctWord)
                    #~spell_menu.addAction(action)
                    popup_menu.addAction(action)

                #~spell_menu.setStyleSheet("QMenu {font: 32px;  margin: 2px;}")
                popup_menu.setStyleSheet("QMenu {font: 24px;}")
                # Only add the spelling suggests to the menu if there are
                # suggestions.

                #~if len(spell_menu.actions()) != 0:
                #~popup_menu.insertSeparator(popup_menu.actions()[0])
                #~popup_menu.insertMenu(popup_menu.actions()[0], spell_menu)
                if len(suggests) > 10:
                    for word in suggests[10:]:
                        action = SpellAction(word, spell_menu)
                        action.correct.connect(self.correctWord)
                        #~spell_menu.addAction(action)
                        spell_menu.addAction(action)
                    spell_menu.setStyleSheet("QMenu {font: 24px;}")

                    popup_menu.addSeparator()
                    popup_menu.addMenu(spell_menu)

                if len(suggests) == 1 and not araby.is_vocalized(suggests[0]):
                    addtodict_action = popup_menu.addAction(u'أضف للقاموس')
                    #~addtodict_action.triggered.connect( lambda x = x = originaltext: self.add_to_dict(x))
                    addtodict_action.triggered.connect(
                        lambda: self.add_to_dict(originaltext))
                    # if the word hs no suggestions
                    # we lookup for customized vocalization
                    suggests = self.dict.custom_dict.lookup(word)
                    for word in suggests:
                        action = SpellAction(word, spell_menu)
                        action.correct.connect(self.correctWord)
                        #~spell_menu.addAction(action)
                        popup_menu.addAction(action)

            else:
                # redo taskeel for this word
                #~pass;
                # if the word hs no suggestions
                # we lookup for customized vocalization
                suggests = self.dict.custom_dict.lookup(text)
                for word in suggests:
                    action = SpellAction(word, spell_menu)
                    action.correct.connect(self.correctWord)
                    #~spell_menu.addAction(action)
                    popup_menu.addAction(action)
        popup_menu.exec_(event.globalPos())