def __init__(self, debug=False): # create a stemmer object for stemming enclitics and procletics self.compStemmer = tashaphyne.stemming.ArabicLightStemmer(); # configure the stemmer object self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS); self.compStemmer.set_prefix_letters(stem_noun_const.COMP_PREFIX_LETTERS); self.compStemmer.set_suffix_letters(stem_noun_const.COMP_SUFFIX_LETTERS); self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX); self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX); self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM); self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST); self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST); # create a stemmer object for stemming conjugated verb self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer(); # configure the stemmer object self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS); self.conjStemmer.set_prefix_letters(stem_noun_const.CONJ_PREFIX_LETTERS); self.conjStemmer.set_suffix_letters(stem_noun_const.CONJ_SUFFIX_LETTERS); self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX); self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX); self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM); self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST); self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST); # enable the last mark (Harakat Al-I3rab) self.allowSyntaxLastMark =True; # noun dictionary # self.nounDictionary=arabicdictionary.arabicDictionary("nouns", NOUN_DICTIONARY_INDEX) self.nounDictionary=arabicdictionary.arabicDictionary("nouns") # allow to print internal results. self.CacheDictSearch={}; self.CacheAffixesVerification={} self.debug=debug;
def __init__(self, debug=False): # create a stemmer object for stemming enclitics and procletics self.compStemmer=tashaphyne.stemming.ArabicLightStemmer(); # configure the stemmer object self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS); self.compStemmer.set_prefix_letters(stem_noun_const.COMP_PREFIX_LETTERS); self.compStemmer.set_suffix_letters(stem_noun_const.COMP_SUFFIX_LETTERS); self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX); self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX); self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM); self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST); self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST); # create a stemmer object for stemming conjugated verb self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer(); # configure the stemmer object self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS); self.conjStemmer.set_prefix_letters(stem_noun_const.CONJ_PREFIX_LETTERS); self.conjStemmer.set_suffix_letters(stem_noun_const.CONJ_SUFFIX_LETTERS); self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX); self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX); self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM); self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST); self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST); # noun dictionary self.nounDictionary=arabicdictionary.arabicDictionary("nouns", NOUN_DICTIONARY_INDEX) #word frequency dictionary self.wordfreq= wordfreqdictionaryclass.wordfreqDictionary('wordfreq', wordfreqdictionaryclass.wordfreq_DICTIONARY_INDEX); # self.TriVerbTable_INDEX={}; self.Table_affix_INDEX={}; self.NOUN_DICTIONARY_STAMP={ } # allow to print internal results. self.debug=debug;
def __init__(self, debug=False): # create a stemmer object for stemming enclitics and procletics self.compStemmer=tashaphyne.stemming.ArabicLightStemmer(); # configure the stemmer object self.compStemmer.set_infix_letters(stem_verb_const.COMP_INFIX_LETTERS); self.compStemmer.set_prefix_letters(stem_verb_const.COMP_PREFIX_LETTERS); self.compStemmer.set_suffix_letters(stem_verb_const.COMP_SUFFIX_LETTERS); self.compStemmer.set_max_prefix_length(stem_verb_const.COMP_MAX_PREFIX); self.compStemmer.set_max_suffix_length(stem_verb_const.COMP_MAX_SUFFIX); self.compStemmer.set_min_stem_length(stem_verb_const.COMP_MIN_STEM); self.compStemmer.set_prefix_list(stem_verb_const.COMP_PREFIX_LIST); self.compStemmer.set_suffix_list(stem_verb_const.COMP_SUFFIX_LIST); # create a stemmer object for stemming conjugated verb self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer(); # configure the stemmer object self.conjStemmer.set_infix_letters(stem_verb_const.CONJ_INFIX_LETTERS); self.conjStemmer.set_prefix_letters(stem_verb_const.CONJ_PREFIX_LETTERS); self.conjStemmer.set_suffix_letters(stem_verb_const.CONJ_SUFFIX_LETTERS); self.conjStemmer.set_max_prefix_length(stem_verb_const.CONJ_MAX_PREFIX); self.conjStemmer.set_max_suffix_length(stem_verb_const.CONJ_MAX_SUFFIX); self.conjStemmer.set_min_stem_length(stem_verb_const.CONJ_MIN_STEM); self.conjStemmer.set_prefix_list(stem_verb_const.CONJ_PREFIX_LIST); self.conjStemmer.set_suffix_list(stem_verb_const.CONJ_SUFFIX_LIST); # enable the last mark (Harakat Al-I3rab) self.allowSyntaxLastMark =True; # To show statistics about verbs statistics={0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0, } self.debug=debug; self.cacheVerb={'verb':{}} self.verbDictionary=arabicdictionary.arabicDictionary("verbs") self.VerbSTAMP_pat=re.compile(u"[%s%s%s%s%s]"%( araby.ALEF, araby.YEH, araby.WAW, araby.ALEF_MAKSURA, araby.SHADDA), re.UNICODE)
def __init__(self, debug=False): # create a stemmer object for stemming enclitics and procletics self.compStemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS) self.compStemmer.set_prefix_letters( stem_noun_const.COMP_PREFIX_LETTERS) self.compStemmer.set_suffix_letters( stem_noun_const.COMP_SUFFIX_LETTERS) self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX) self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX) self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM) self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST) self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST) # create a stemmer object for stemming conjugated verb self.conjStemmer = tashaphyne.stemming.ArabicLightStemmer() # configure the stemmer object self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS) self.conjStemmer.set_prefix_letters( stem_noun_const.CONJ_PREFIX_LETTERS) self.conjStemmer.set_suffix_letters( stem_noun_const.CONJ_SUFFIX_LETTERS) self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX) self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX) self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM) self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST) self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST) # noun dictionary self.nounDictionary = arabicdictionary.arabicDictionary( "nouns", NOUN_DICTIONARY_INDEX) #word frequency dictionary self.wordfreq = wordfreqdictionaryclass.wordfreqDictionary( 'wordfreq', wordfreqdictionaryclass.wordfreq_DICTIONARY_INDEX) # self.TriVerbTable_INDEX={}; self.Table_affix_INDEX = {} self.NOUN_DICTIONARY_STAMP = {} # allow to print internal results. self.debug = debug