Ejemplo n.º 1
0
#
# Author:      Taha Zerrouki (taha.zerrouki[at]gmail.com)
#
# Created:     18-05-2014
# Copyright:   (c) Taha Zerrouki 2014
# Licence:     GPL
#-------------------------------------------------------------------------------
"""
wordCase represents the data resulted from the morpholocigal analysis
"""
if __name__ == "__main__":
    import sys
    sys.path.append('..')
#~import pyarabic.araby as araby
import pyarabic.arabrepr as arabrepr
arabicRepr = arabrepr.ArabicRepr()

#~import analex_const


class WordCase:
    """
    wordCase represents the data resulted from the morpholocigal analysis
    """

    def __init__(self, result_dict=None):
        self.word = u"",
        #~"""input word"""
        self.vocalized = u"",
        #~"""vocalized form of the input word """
        self.semivocalized = u"",
Ejemplo n.º 2
0
    def accepted(self, word):
        """
        test if  word is accecpted word (correct)
        @param word: input text.
        @type word: unicode.
        @return: True if word is accepted
        rtype: boolean.
        """
        result = self.analyzer.check_word(word)
        if result:
            # result has many cases
            if len(result) > 1:
                return True
            #one only case
            else:
                return not result[0].is_unknown()
        return False


if __name__ == "__main__":
    print "test"
    myrepr = arabRepr.ArabicRepr()
    speller = SpellcheckClass()
    text = u" اللغه العربيه"
    voc = speller.spellcheck(text, True)
    # print myrepr.repr(voc).encode('utf8')
    for itemd in voc:
        if itemd.get('suggest', '') != '':
            for sug in itemd.get('suggest', '').split(';'):
                print sug.encode('utf8'), '\t', araby.is_arabicword(sug)