def __init__(self, debug=False): # create a stemmer object for stemming enclitics and proclitics self.comp_stemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.comp_stemmer.set_prefix_list(SVC.COMP_PREFIX_LIST) self.comp_stemmer.set_suffix_list(SVC.COMP_SUFFIX_LIST) # create a stemmer object for stemming conjugated verb self.conj_stemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.conj_stemmer.set_prefix_list(SVC.CONJ_PREFIX_LIST) self.conj_stemmer.set_suffix_list(SVC.CONJ_SUFFIX_LIST) # enable the last mark (Harakat Al-I3rab) self.allow_syntax_lastmark = True # To show statistics about verbs #~statistics = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, #~10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0, #~} # affixes compatibility self.compatibility_cache = {} #~ self.verb_dict_cache = {} self.debug = debug self.cache_verb = {'verb': {}} self.verb_dictionary = arabicdictionary.ArabicDictionary("verbs") self.verb_stamp_pat = SVC.VERB_STAMP_PAT
def __init__(self, debug=False): # create a stemmer object for stemming enclitics and procletics self.comp_stemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.comp_stemmer.set_prefix_list(svconst.COMP_PREFIX_LIST) self.comp_stemmer.set_suffix_list(svconst.COMP_SUFFIX_LIST) # create a stemmer object for stemming conjugated verb self.conj_stemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.conj_stemmer.set_prefix_list(svconst.CONJ_PREFIX_LIST) self.conj_stemmer.set_suffix_list(svconst.CONJ_SUFFIX_LIST) # enable the last mark (Harakat Al-I3rab) self.allow_syntax_lastmark = True # To show statistics about verbs #~statistics = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, #~10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0, #~} self.debug = debug self.cache_verb = {'verb': {}} self.verb_dictionary = arabicdictionary.ArabicDictionary("verbs") self.verb_stamp_pat = re.compile( u"[%s%s%s%s%s%s]" % (araby.ALEF, araby.YEH, araby.WAW, araby.ALEF_MAKSURA, araby.HAMZA, araby.SHADDA), re.UNICODE)
def __init__(self, debug=False): # create a stemmer object for stemming enclitics and proclitics self.comp_stemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.comp_stemmer.set_prefix_list(SNC.COMP_PREFIX_LIST) self.comp_stemmer.set_suffix_list(SNC.COMP_SUFFIX_LIST) # create a stemmer object for stemming conjugated verb self.conj_stemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.conj_stemmer.set_prefix_list(SNC.CONJ_PREFIX_LIST) self.conj_stemmer.set_suffix_list(SNC.CONJ_SUFFIX_LIST) # generator self.generator = alyahmor.noun_affixer.noun_affixer() # enable the last mark (Harakat Al-I3rab) self.allow_syntax_lastmark = True # noun dictionary self.noun_dictionary = arabicdictionary.ArabicDictionary("nouns") # costum noun dictionary self.custom_noun_dictionary = custom_dictionary.custom_dictionary( "nouns") # allow to print internal results. self.cache_dict_search = {} self.cache_affixes_verification = {} self.debug = debug self.error_code = ""
def test_lookup(self): """Test lookup""" mydict = arz.ArabicDictionary('verbs') #~ wordlist = [u"استقلّ", u'استقل', u"كذب"] #~ foundlist = mydict.lookup(word) self.assertEqual(len(mydict.lookup(u"استقلّ")), 0) self.assertEqual(len(mydict.lookup(u'استقل')), 1) self.assertEqual(len(mydict.lookup(u"كذب")), 2)