예제 #1
0
	def __init__(self, debug=False):
		# create a stemmer object for stemming enclitics and procletics
		self.compStemmer = tashaphyne.stemming.ArabicLightStemmer();
		# configure the stemmer object
		self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS);
		self.compStemmer.set_prefix_letters(stem_noun_const.COMP_PREFIX_LETTERS);
		self.compStemmer.set_suffix_letters(stem_noun_const.COMP_SUFFIX_LETTERS);
		self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX);
		self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX);
		self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM);
		self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST);
		self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST);
		# create a stemmer object for stemming conjugated verb
		self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer();
		# configure the stemmer object
		self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS);
		self.conjStemmer.set_prefix_letters(stem_noun_const.CONJ_PREFIX_LETTERS);
		self.conjStemmer.set_suffix_letters(stem_noun_const.CONJ_SUFFIX_LETTERS);
		self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX);
		self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX);
		self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM);
		self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST);
		self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST);

		# enable the last mark (Harakat Al-I3rab) 
		self.allowSyntaxLastMark =True; 

		# noun dictionary
		# self.nounDictionary=arabicdictionary.arabicDictionary("nouns", NOUN_DICTIONARY_INDEX)
		self.nounDictionary=arabicdictionary.arabicDictionary("nouns")		

		# allow to print internal results.
		self.CacheDictSearch={};
		self.CacheAffixesVerification={}
		self.debug=debug;
예제 #2
0
	def __init__(self, debug=False):
		# create a stemmer object for stemming enclitics and procletics
		self.compStemmer = tashaphyne.stemming.ArabicLightStemmer();
		# configure the stemmer object
		self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS);
		self.compStemmer.set_prefix_letters(stem_noun_const.COMP_PREFIX_LETTERS);
		self.compStemmer.set_suffix_letters(stem_noun_const.COMP_SUFFIX_LETTERS);
		self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX);
		self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX);
		self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM);
		self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST);
		self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST);
		# create a stemmer object for stemming conjugated verb
		self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer();
		# configure the stemmer object
		self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS);
		self.conjStemmer.set_prefix_letters(stem_noun_const.CONJ_PREFIX_LETTERS);
		self.conjStemmer.set_suffix_letters(stem_noun_const.CONJ_SUFFIX_LETTERS);
		self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX);
		self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX);
		self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM);
		self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST);
		self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST);

		# enable the last mark (Harakat Al-I3rab) 
		self.allowSyntaxLastMark =True; 

		# noun dictionary
		# self.nounDictionary=arabicdictionary.arabicDictionary("nouns", NOUN_DICTIONARY_INDEX)
		self.nounDictionary=arabicdictionary.arabicDictionary("nouns")		

		# allow to print internal results.
		self.CacheDictSearch={};
		self.CacheAffixesVerification={}
		self.debug=debug;
예제 #3
0
	def __init__(self, debug=False):
		# create a stemmer object for stemming enclitics and procletics
		self.compStemmer=tashaphyne.stemming.ArabicLightStemmer();
		# configure the stemmer object
		self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS);
		self.compStemmer.set_prefix_letters(stem_noun_const.COMP_PREFIX_LETTERS);
		self.compStemmer.set_suffix_letters(stem_noun_const.COMP_SUFFIX_LETTERS);
		self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX);
		self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX);
		self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM);
		self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST);
		self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST);
		# create a stemmer object for stemming conjugated verb
		self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer();
		# configure the stemmer object
		self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS);
		self.conjStemmer.set_prefix_letters(stem_noun_const.CONJ_PREFIX_LETTERS);
		self.conjStemmer.set_suffix_letters(stem_noun_const.CONJ_SUFFIX_LETTERS);
		self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX);
		self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX);
		self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM);
		self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST);
		self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST);
		# noun dictionary
		self.nounDictionary=arabicdictionary.arabicDictionary("nouns", NOUN_DICTIONARY_INDEX)
		#word frequency dictionary
		self.wordfreq= wordfreqdictionaryclass.wordfreqDictionary('wordfreq', wordfreqdictionaryclass.wordfreq_DICTIONARY_INDEX);
		
#		self.TriVerbTable_INDEX={};
		self.Table_affix_INDEX={};
		self.NOUN_DICTIONARY_STAMP={
        }
		# allow to print internal results.
		self.debug=debug;
예제 #4
0
	def __init__(self, debug=False):
		# create a stemmer object for stemming enclitics and procletics
		self.compStemmer=tashaphyne.stemming.ArabicLightStemmer();

		# configure the stemmer object
		self.compStemmer.set_infix_letters(stem_verb_const.COMP_INFIX_LETTERS);
		self.compStemmer.set_prefix_letters(stem_verb_const.COMP_PREFIX_LETTERS);
		self.compStemmer.set_suffix_letters(stem_verb_const.COMP_SUFFIX_LETTERS);
		self.compStemmer.set_max_prefix_length(stem_verb_const.COMP_MAX_PREFIX);
		self.compStemmer.set_max_suffix_length(stem_verb_const.COMP_MAX_SUFFIX);
		self.compStemmer.set_min_stem_length(stem_verb_const.COMP_MIN_STEM);
		self.compStemmer.set_prefix_list(stem_verb_const.COMP_PREFIX_LIST);
		self.compStemmer.set_suffix_list(stem_verb_const.COMP_SUFFIX_LIST);


		# create a stemmer object for stemming conjugated verb
		self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer();

		# configure the stemmer object
		self.conjStemmer.set_infix_letters(stem_verb_const.CONJ_INFIX_LETTERS);
		self.conjStemmer.set_prefix_letters(stem_verb_const.CONJ_PREFIX_LETTERS);
		self.conjStemmer.set_suffix_letters(stem_verb_const.CONJ_SUFFIX_LETTERS);
		self.conjStemmer.set_max_prefix_length(stem_verb_const.CONJ_MAX_PREFIX);
		self.conjStemmer.set_max_suffix_length(stem_verb_const.CONJ_MAX_SUFFIX);
		self.conjStemmer.set_min_stem_length(stem_verb_const.CONJ_MIN_STEM);
		self.conjStemmer.set_prefix_list(stem_verb_const.CONJ_PREFIX_LIST);
		self.conjStemmer.set_suffix_list(stem_verb_const.CONJ_SUFFIX_LIST);
		# enable the last mark (Harakat Al-I3rab) 
		self.allowSyntaxLastMark =True; 

		# To show statistics about verbs
		statistics={0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0, 
		}

		self.debug=debug;
		self.cacheVerb={'verb':{}}
		
		self.verbDictionary=arabicdictionary.arabicDictionary("verbs")		

		self.VerbSTAMP_pat=re.compile(u"[%s%s%s%s%s]"%( araby.ALEF, araby.YEH,   araby.WAW,  araby.ALEF_MAKSURA, araby.SHADDA), re.UNICODE)
예제 #5
0
	def __init__(self, debug=False):
		# create a stemmer object for stemming enclitics and procletics
		self.compStemmer=tashaphyne.stemming.ArabicLightStemmer();

		# configure the stemmer object
		self.compStemmer.set_infix_letters(stem_verb_const.COMP_INFIX_LETTERS);
		self.compStemmer.set_prefix_letters(stem_verb_const.COMP_PREFIX_LETTERS);
		self.compStemmer.set_suffix_letters(stem_verb_const.COMP_SUFFIX_LETTERS);
		self.compStemmer.set_max_prefix_length(stem_verb_const.COMP_MAX_PREFIX);
		self.compStemmer.set_max_suffix_length(stem_verb_const.COMP_MAX_SUFFIX);
		self.compStemmer.set_min_stem_length(stem_verb_const.COMP_MIN_STEM);
		self.compStemmer.set_prefix_list(stem_verb_const.COMP_PREFIX_LIST);
		self.compStemmer.set_suffix_list(stem_verb_const.COMP_SUFFIX_LIST);


		# create a stemmer object for stemming conjugated verb
		self.conjStemmer=tashaphyne.stemming.ArabicLightStemmer();

		# configure the stemmer object
		self.conjStemmer.set_infix_letters(stem_verb_const.CONJ_INFIX_LETTERS);
		self.conjStemmer.set_prefix_letters(stem_verb_const.CONJ_PREFIX_LETTERS);
		self.conjStemmer.set_suffix_letters(stem_verb_const.CONJ_SUFFIX_LETTERS);
		self.conjStemmer.set_max_prefix_length(stem_verb_const.CONJ_MAX_PREFIX);
		self.conjStemmer.set_max_suffix_length(stem_verb_const.CONJ_MAX_SUFFIX);
		self.conjStemmer.set_min_stem_length(stem_verb_const.CONJ_MIN_STEM);
		self.conjStemmer.set_prefix_list(stem_verb_const.CONJ_PREFIX_LIST);
		self.conjStemmer.set_suffix_list(stem_verb_const.CONJ_SUFFIX_LIST);
		# enable the last mark (Harakat Al-I3rab) 
		self.allowSyntaxLastMark =True; 

		# To show statistics about verbs
		statistics={0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0, 
		}

		self.debug=debug;
		self.cacheVerb={'verb':{}}
		
		self.verbDictionary=arabicdictionary.arabicDictionary("verbs")		

		self.VerbSTAMP_pat=re.compile(u"[%s%s%s%s%s]"%( araby.ALEF, araby.YEH,   araby.WAW,  araby.ALEF_MAKSURA, araby.SHADDA), re.UNICODE)
예제 #6
0
    def __init__(self, debug=False):
        # create a stemmer object for stemming enclitics and procletics
        self.compStemmer = tashaphyne.stemming.ArabicLightStemmer()
        # configure the stemmer object
        self.compStemmer.set_infix_letters(stem_noun_const.COMP_INFIX_LETTERS)
        self.compStemmer.set_prefix_letters(
            stem_noun_const.COMP_PREFIX_LETTERS)
        self.compStemmer.set_suffix_letters(
            stem_noun_const.COMP_SUFFIX_LETTERS)
        self.compStemmer.set_max_prefix_length(stem_noun_const.COMP_MAX_PREFIX)
        self.compStemmer.set_max_suffix_length(stem_noun_const.COMP_MAX_SUFFIX)
        self.compStemmer.set_min_stem_length(stem_noun_const.COMP_MIN_STEM)
        self.compStemmer.set_prefix_list(stem_noun_const.COMP_PREFIX_LIST)
        self.compStemmer.set_suffix_list(stem_noun_const.COMP_SUFFIX_LIST)
        # create a stemmer object for stemming conjugated verb
        self.conjStemmer = tashaphyne.stemming.ArabicLightStemmer()
        # configure the stemmer object
        self.conjStemmer.set_infix_letters(stem_noun_const.CONJ_INFIX_LETTERS)
        self.conjStemmer.set_prefix_letters(
            stem_noun_const.CONJ_PREFIX_LETTERS)
        self.conjStemmer.set_suffix_letters(
            stem_noun_const.CONJ_SUFFIX_LETTERS)
        self.conjStemmer.set_max_prefix_length(stem_noun_const.CONJ_MAX_PREFIX)
        self.conjStemmer.set_max_suffix_length(stem_noun_const.CONJ_MAX_SUFFIX)
        self.conjStemmer.set_min_stem_length(stem_noun_const.CONJ_MIN_STEM)
        self.conjStemmer.set_prefix_list(stem_noun_const.CONJ_PREFIX_LIST)
        self.conjStemmer.set_suffix_list(stem_noun_const.CONJ_SUFFIX_LIST)
        # noun dictionary
        self.nounDictionary = arabicdictionary.arabicDictionary(
            "nouns", NOUN_DICTIONARY_INDEX)
        #word frequency dictionary
        self.wordfreq = wordfreqdictionaryclass.wordfreqDictionary(
            'wordfreq', wordfreqdictionaryclass.wordfreq_DICTIONARY_INDEX)

        #		self.TriVerbTable_INDEX={};
        self.Table_affix_INDEX = {}
        self.NOUN_DICTIONARY_STAMP = {}
        # allow to print internal results.
        self.debug = debug