Esempio n. 1
0
    def lookup(self, normalized):
        """
		look up for all word forms in the dictionary
		@param normalized: the normalized word.
		@type normalized: unicode.
		@return: list of dictionary entries IDs.
		@rtype: list.
		"""
        idList = []
        normword = araby.normalizeHamza(normalized)
        # print "###", normword.encode('utf8');

        sql = u"select id FROM %s WHERE normalized='%s'" % (self.tableName, normword)
        try:
            self.cursor.execute(sql)
            if self.cursor:
                for row in self.cursor:
                    idList.append(row[0])
            return idList
        except:
            return []
Esempio n. 2
0
	def lookup(self,normalized):
		"""
		look up for all word forms in the dictionary
		@param normalized: the normalized word.
		@type normalized: unicode.
		@return: list of dictionary entries .
		@rtype: list.
		"""
		idList=[];
		normword=araby.normalizeHamza(normalized)
		#print "###", normword.encode('utf8');

		sql = u"select * FROM %s WHERE normalized='%s'"%(self.tableName,normword);
		try:
			self.cursor.execute(sql);
			if self.cursor:
				# return self.curser.fetchall();
				for row in self.cursor: idList.append(row);
			return idList;
		except:
			return [];
Esempio n. 3
0
	def verbStamp(self, word):
		"""
		generate a stamp for a verb, 
		the verb stamp is different of word stamp, by hamza noralization
		remove all letters which can change form in the word :
		- ALEF, 
		- YEH, 
		- WAW, 
		- ALEF_MAKSURA
		- SHADDA
		@return: stamped word
		"""
		word=araby.stripTashkeel(word);
		#The vowels are striped in stamp function
		word=araby.normalizeHamza(word);
		if word.startswith(araby.HAMZA):
			#strip The first hamza
			word=word[1:];
		# strip the last letter if is doubled
		if word[-1:]== word[-2:-1]:
			word=word[:-1];
		return self.VerbSTAMP_pat.sub('', word)
Esempio n. 4
0
    def verbStamp(self, word):
        """
		generate a stamp for a verb, 
		the verb stamp is different of word stamp, by hamza noralization
		remove all letters which can change form in the word :
		- ALEF, 
		- YEH, 
		- WAW, 
		- ALEF_MAKSURA
		- SHADDA
		@return: stamped word
		"""
        word = araby.stripTashkeel(word)
        #The vowels are striped in stamp function
        word = araby.normalizeHamza(word)
        if word.startswith(araby.HAMZA):
            #strip The first hamza
            word = word[1:]
        # strip the last letter if is doubled
        if word[-1:] == word[-2:-1]:
            word = word[:-1]
        return self.VerbSTAMP_pat.sub('', word)
Esempio n. 5
0
    def steming_second_level(self, noun, noun2, procletic, encletic):
        """
		Analyze word morphologically by stemming the conjugation affixes.
		@param noun: the input noun.
		@type noun: unicode.
		@param noun2: the noun stemed from syntaxic affixes.
		@type noun2: unicode.
		@param procletic: the syntaxic prefixe extracted in the fisrt stage.
		@type procletic: unicode.
		@param encletic: the syntaxic suffixe extracted in the fisrt stage.
		@type encletic: unicode.
		@return: list of dictionaries of analyzed words with tags.
		@rtype: list.
		"""
        detailed_result = []
        #segment the coinjugated verb
        list_seg_conj = self.conjStemmer.segment(noun2)
        # verify affix compatibility
        list_seg_conj = self.verify_affix(
            noun2, list_seg_conj, stem_noun_const.NOMINAL_CONJUGATION_AFFIX)
        # add vocalized forms of suffixes
        list_seg_conj_voc = []
        for seg_conj in list_seg_conj:
            prefix_conj = noun2[:seg_conj[0]]
            stem_conj = noun2[seg_conj[0]:seg_conj[1]]
            suffix_conj = noun2[seg_conj[1]:]
            affix_conj = prefix_conj + '-' + suffix_conj
            # get all vocalized form of suffixes
            for vocalized_suffix in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[
                    suffix_conj]['vocalized']:
                # if u'تنوين' not in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[vocalized_suffix]['tags']:
                seg_conj_voc = {
                    'prefix': '',
                    'suffix': vocalized_suffix,
                    'stem': stem_conj
                }
                # verify compatibility between procletics and afix
                if (self.is_compatible_proaffix_affix(procletic, encletic,
                                                      vocalized_suffix)):
                    # verify the existing of a noun stamp in the dictionary
                    # if self.NOUN_DICTIONARY_STAMP.has_key(stamp):
                    # list_seg_conj2.append(seg_conj)
                    list_seg_conj_voc.append(seg_conj_voc)

        list_seg_conj = list_seg_conj_voc
        for seg_conj in list_seg_conj:
            prefix_conj = seg_conj['prefix']
            stem_conj = seg_conj['stem']
            suffix_conj = seg_conj['suffix']
            has_plural_suffix = (
                (u"جمع"
                 in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['tags'])
                or
                (u"مثنى"
                 in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['tags'])
            )
            #print "has_plural", has_plural_suffix;
            affix_conj = '-'.join([prefix_conj, suffix_conj])
            # noirmalize hamza before gessing  deffirents origines
            stem_conj = araby.normalizeHamza(stem_conj)
            if self.debug:
                print "*\t", "-".join(
                    [str(len(stem_conj)), prefix_conj, stem_conj,
                     suffix_conj]).encode("utf8")
            # generate possible stems
            # add stripped letters to the stem to constitute possible noun list
            possible_noun_list = self.getStemVariants(stem_conj, prefix_conj,
                                                      suffix_conj)
            if self.debug:
                print "\tpossible original nouns:  ", "\t".join(
                    possible_noun_list).encode('utf8')
            # search the noun in the dictionary
            # we can return the tashkeel
            infnoun_form_list = []
            for infnoun in possible_noun_list:
                # get the noun and get all its forms from the dict
                # if the noun has plural suffix, don't look up in broken plural dictionary
                infnoun_foundL = self.nounDictionary.lookup(infnoun)
                #infnoun_found=self.find_nouns_in_dictionary(infnoun,has_plural_suffix);
                ##							listsingle=self.find_broken_plural(infnoun);
                ##							print ' *****','-'.join(listsingle).encode('utf8')
                if len(infnoun_foundL) > 0:
                    if self.debug: print "\t in dict", infnoun.encode('utf8')
                else:
                    if self.debug:
                        print infnoun.encode('utf8'), "not found in dictionary"
                infnoun_form_list += infnoun_foundL
            for id in infnoun_form_list:
                noun_tuple = self.nounDictionary.getEntryById(id)
                infnoun = noun_tuple['vocalized']
                # affixes tags contains prefixes and suffixes tags
                affix_tags  =  stem_noun_const.COMP_PREFIX_LIST_TAGS[procletic]['tags'] \
                    +stem_noun_const.COMP_SUFFIX_LIST_TAGS[encletic]['tags'] \
                    +stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['tags']
                #test if the  given word from dictionary accept those tags given by affixes
                # دراسة توافق الزوائد مع خصائص الاسم،
                # مثلا هل يقبل الاسم التأنيث.
                # if not self.validateTags(noun_tuple, affix_tags, procletic, encletic, suffix_conj):

                if self.validateTags(noun_tuple, affix_tags, procletic,
                                     encletic, suffix_conj):
                    # if the result vocalized noun is not the same length
                    vocalized = self.vocalize(infnoun, procletic, prefix_conj,
                                              suffix_conj, encletic)
                    # the noun can have some harakat or shadda, then we must remove all tashkeel and compare
                    # vocalized_nm=araby.stripTashkeel(vocalized);
                    # noun_nm=araby.stripTashkeel(noun);

                    original = noun_tuple['original']
                    wordtype = noun_tuple['wordtype']
                    #add some tags from dictionary entry as mamnou3 min sarf and broken plural
                    originalTags = []
                    if noun_tuple['mamnou3_sarf'] == u"ممنوع من الصرف":
                        originalTags.append(u"ممنوع من الصرف")
                    if noun_tuple['number'] == u"جمع تكسير":
                        originalTags.append(u"جمع تكسير")
                        # affix_tags+=(,);
                    detailed_result.append(
                        stemmedword.stemmedWord({
                            'word':
                            noun,
                            'procletic':
                            procletic,
                            'encletic':
                            encletic,
                            'prefix':
                            prefix_conj,
                            'suffix':
                            suffix_conj,
                            'stem':
                            stem_conj,
                            'original':
                            infnoun,  #original,
                            'vocalized':
                            vocalized,
                            'tags':
                            u':'.join(affix_tags),
                            'type':
                            u':'.join(['Noun', wordtype]),  #'Noun',
                            'root':
                            '',
                            'template':
                            '',
                            'freq':
                            'freqnoun',  # to note the frequency type 
                            'originaltags':
                            u':'.join(originalTags),
                            'syntax':
                            '',
                        }))
        return detailed_result
Esempio n. 6
0
    def steming_second_level(self, noun, noun2, procletic, encletic,
                             encletic_nm):
        """
		Analyze word morphologically by stemming the conjugation affixes.
		@param noun: the input noun.
		@type noun: unicode.
		@param noun2: the noun stemed from syntaxic affixes.
		@type noun2: unicode.
		@param procletic: the syntaxic prefixe extracted in the fisrt stage.
		@type procletic: unicode.
		@param encletic: the syntaxic suffixe extracted in the fisrt stage.
		@type encletic: unicode.
		@param encletic_nm: the syntaxic suffixe extracted in the fisrt stage (not vocalized).
		@type encletic_nm: unicode.		
		@return: list of dictionaries of analyzed words with tags.
		@rtype: list.
		"""
        detailed_result = []
        #segment the coinjugated verb
        list_seg_conj = self.conjStemmer.segment(noun2)
        # verify affix compatibility
        list_seg_conj = self.verify_affix(
            noun2, list_seg_conj, stem_noun_const.NOMINAL_CONJUGATION_AFFIX)
        # add vocalized forms of suffixes
        # and create the real affixes from the word
        list_seg_conj_voc = []
        for seg_conj in list_seg_conj:
            stem_conj = noun2[seg_conj[0]:seg_conj[1]]
            suffix_conj_nm = noun2[seg_conj[1]:]

            # noirmalize hamza before gessing  differents origines
            stem_conj = araby.normalizeHamza(stem_conj)

            # generate possible stems
            # add stripped letters to the stem to constitute possible noun list
            possible_noun_list = self.getStemVariants(stem_conj,
                                                      suffix_conj_nm)

            # search the noun in the dictionary
            # we can return the tashkeel
            infnoun_form_list = []
            for infnoun in set(possible_noun_list):
                # get the noun and get all its forms from the dict
                # if the noun has plural suffix, don't look up in broken plural dictionary
                if infnoun not in self.CacheDictSearch:
                    infnoun_foundL = self.nounDictionary.lookup(infnoun)
                    self.CacheDictSearch[infnoun] = self.createDictWord(
                        infnoun_foundL)
                else:
                    infnoun_foundL = self.CacheDictSearch[infnoun]
                infnoun_form_list.extend(infnoun_foundL)
            #print "len loooked up noun in dictionnary ",len(infnoun_form_list), len(set(infnoun_form_list));
            for noun_tuple in infnoun_form_list:
                # noun_tuple=self.nounDictionary.getEntryById(id);
                infnoun = noun_tuple['vocalized']
                # affixes tags contains prefixes and suffixes tags
                affix_tags  =  stem_noun_const.COMP_PREFIX_LIST_TAGS[procletic]['tags'] \
                    +stem_noun_const.COMP_SUFFIX_LIST_TAGS[encletic_nm]['tags'] \
                    +stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj_nm]['tags']
                #test if the  given word from dictionary accept those tags given by affixes
                # دراسة توافق الزوائد مع خصائص الاسم،
                # مثلا هل يقبل الاسم التأنيث.

                if self.validateTags(noun_tuple, affix_tags, procletic,
                                     encletic_nm, suffix_conj_nm):
                    ## get all vocalized form of suffixes
                    for vocalized_encletic in stem_noun_const.COMP_SUFFIX_LIST_TAGS[
                            encletic_nm]['vocalized']:
                        for vocalized_suffix in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[
                                suffix_conj_nm]['vocalized']:
                            ## verify compatibility between procletics and affix
                            if (self.is_compatible_proaffix_affix(
                                    noun_tuple, procletic, vocalized_encletic,
                                    vocalized_suffix)):
                                vocalized, semiVocalized = self.vocalize(
                                    infnoun, procletic, vocalized_suffix,
                                    vocalized_encletic)
                                vocalized_affix_tags  =  stem_noun_const.COMP_PREFIX_LIST_TAGS[procletic]['tags'] \
                                    +stem_noun_const.COMP_SUFFIX_LIST_TAGS[vocalized_encletic]['tags'] \
                                    +stem_noun_const.CONJ_SUFFIX_LIST_TAGS[vocalized_suffix]['tags']

                                #add some tags from dictionary entry as mamnou3 min sarf and broken plural
                                originalTags = []
                                if noun_tuple[
                                        'mamnou3_sarf'] == u"ممنوع من الصرف":
                                    originalTags.append(u"ممنوع من الصرف")
                                if noun_tuple['number'] == u"جمع تكسير":
                                    originalTags.append(u"جمع تكسير")
                                    # affix_tags+=(, );
                                detailed_result.append(
                                    wordCase.wordCase({
                                        'word':
                                        noun,
                                        #~ 'affix': analex_const.AffixTuple((procletic=procletic, encletic=vocalized_encletic, prefix='', suffix=vocalized_suffix)),
                                        'affix':
                                        (procletic, '', vocalized_suffix,
                                         vocalized_encletic),
                                        #~ 'procletic': ,
                                        #~ 'encletic':  ,
                                        #~ 'prefix':    '',
                                        #~ 'suffix':    vocalized_suffix,
                                        'stem':
                                        stem_conj,
                                        'original':
                                        infnoun,  #original, 
                                        'vocalized':
                                        vocalized,
                                        'semivocalized':
                                        semiVocalized,
                                        'tags':
                                        u':'.join(vocalized_affix_tags),
                                        'type':
                                        u':'.join([
                                            'Noun', noun_tuple['wordtype']
                                        ]),  #'Noun', 
                                        #~ 'root':      '',
                                        #~ 'template':  '',
                                        'freq':
                                        'freqnoun',  # to note the frequency type 
                                        'originaltags':
                                        u':'.join(originalTags),
                                        'syntax':
                                        '',
                                    }))
        return detailed_result
	def steming_second_level(self, noun, noun2, procletic, encletic, encletic_nm):
		"""
		Analyze word morphologically by stemming the conjugation affixes.
		@param noun: the input noun.
		@type noun: unicode.
		@param noun2: the noun stemed from syntaxic affixes.
		@type noun2: unicode.
		@param procletic: the syntaxic prefixe extracted in the fisrt stage.
		@type procletic: unicode.
		@param encletic: the syntaxic suffixe extracted in the fisrt stage.
		@type encletic: unicode.
		@param encletic_nm: the syntaxic suffixe extracted in the fisrt stage (not vocalized).
		@type encletic_nm: unicode.		
		@return: list of dictionaries of analyzed words with tags.
		@rtype: list.
		"""	
		detailed_result=[];
		#segment the coinjugated verb
		list_seg_conj = self.conjStemmer.segment(noun2);
		# verify affix compatibility
		list_seg_conj = self.verify_affix(noun2, list_seg_conj, stem_noun_const.NOMINAL_CONJUGATION_AFFIX);
		# add vocalized forms of suffixes
		# and create the real affixes from the word
		list_seg_conj_voc=[];
		for seg_conj in list_seg_conj:
			stem_conj   = noun2[seg_conj[0]:seg_conj[1]]
			suffix_conj_nm = noun2[seg_conj[1]:]

			# noirmalize hamza before gessing  differents origines
			stem_conj = araby.normalizeHamza(stem_conj)

			# generate possible stems
			# add stripped letters to the stem to constitute possible noun list
			possible_noun_list=self.getStemVariants(stem_conj, suffix_conj_nm);

			# search the noun in the dictionary
			# we can return the tashkeel
			infnoun_form_list=[];
			for infnoun in set(possible_noun_list):
				# get the noun and get all its forms from the dict
				# if the noun has plural suffix, don't look up in broken plural dictionary
				if infnoun not in self.CacheDictSearch:
					infnoun_foundL = self.nounDictionary.lookup(infnoun);
					self.CacheDictSearch[infnoun]  = self.createDictWord(infnoun_foundL);
				else: 
					infnoun_foundL = self.CacheDictSearch[infnoun]  ;					
				infnoun_form_list.extend(infnoun_foundL);
			#print "len loooked up noun in dictionnary ",len(infnoun_form_list), len(set(infnoun_form_list));
			for noun_tuple in infnoun_form_list:
				# noun_tuple=self.nounDictionary.getEntryById(id);
				infnoun = noun_tuple['vocalized'];
				# affixes tags contains prefixes and suffixes tags
				affix_tags  =  stem_noun_const.COMP_PREFIX_LIST_TAGS[procletic]['tags'] \
								+stem_noun_const.COMP_SUFFIX_LIST_TAGS[encletic_nm]['tags'] \
								+stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj_nm]['tags']
				#test if the  given word from dictionary accept those tags given by affixes
				# دراسة توافق الزوائد مع خصائص الاسم،
				# مثلا هل يقبل الاسم التأنيث.
	
				if self.validateTags(noun_tuple, affix_tags, procletic, encletic_nm, suffix_conj_nm):
					## get all vocalized form of suffixes
					for vocalized_encletic in stem_noun_const.COMP_SUFFIX_LIST_TAGS[encletic_nm]['vocalized']:
						for vocalized_suffix in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj_nm]['vocalized']:
							## verify compatibility between procletics and affix
							if (self.is_compatible_proaffix_affix(noun_tuple, procletic, vocalized_encletic, vocalized_suffix)):
								vocalized, semiVocalized = self.vocalize(infnoun, procletic,  vocalized_suffix, vocalized_encletic);
								vocalized_affix_tags  =  stem_noun_const.COMP_PREFIX_LIST_TAGS[procletic]['tags'] \
												+stem_noun_const.COMP_SUFFIX_LIST_TAGS[vocalized_encletic]['tags'] \
												+stem_noun_const.CONJ_SUFFIX_LIST_TAGS[vocalized_suffix]['tags']								
								
								#add some tags from dictionary entry as mamnou3 min sarf and broken plural
								originalTags=[];
								if noun_tuple['mamnou3_sarf']==u"ممنوع من الصرف":
									originalTags.append(u"ممنوع من الصرف");
								if noun_tuple['number']==u"جمع تكسير":
									originalTags.append(u"جمع تكسير");						
									# affix_tags+=(, );
								detailed_result.append(wordCase.wordCase({
								'word':noun, 
								#~ 'affix': analex_const.AffixTuple((procletic=procletic, encletic=vocalized_encletic, prefix='', suffix=vocalized_suffix)),								
								'affix': (procletic,  '', vocalized_suffix, vocalized_encletic),								
								#~ 'procletic': , 
								#~ 'encletic':  , 
								#~ 'prefix':    '', 
								#~ 'suffix':    vocalized_suffix, 
								'stem':      stem_conj, 
								'original':  infnoun, #original, 
								'vocalized': vocalized, 
								'semivocalized':semiVocalized,
								'tags':      u':'.join(vocalized_affix_tags), 
								'type':      u':'.join(['Noun', noun_tuple['wordtype']]), #'Noun', 
								#~ 'root':      '', 
								#~ 'template':  '', 
								'freq':'freqnoun', # to note the frequency type 
								'originaltags':u':'.join(originalTags), 
								'syntax':'', 
								}));
		return detailed_result;
Esempio n. 8
0
	def steming_second_level(self,noun,noun2,procletic,encletic):
		"""
		Analyze word morphologically by stemming the conjugation affixes.
		@param noun: the input noun.
		@type noun: unicode.
		@param noun2: the noun stemed from syntaxic affixes.
		@type noun2: unicode.
		@param procletic: the syntaxic prefixe extracted in the fisrt stage.
		@type procletic: unicode.
		@param encletic: the syntaxic suffixe extracted in the fisrt stage.
		@type encletic: unicode.
		@return: list of dictionaries of analyzed words with tags.
		@rtype: list.
		"""	
		detailed_result=[];
		#segment the coinjugated verb
		list_seg_conj=self.conjStemmer.segment(noun2);
		# verify affix compatibility
		list_seg_conj=self.verify_affix(noun2,list_seg_conj,stem_noun_const.NOMINAL_CONJUGATION_AFFIX);
		# add vocalized forms of suffixes
		list_seg_conj_voc=[];
		for seg_conj in list_seg_conj:
			prefix_conj=noun2[:seg_conj[0]];
			stem_conj=noun2[seg_conj[0]:seg_conj[1]]
			suffix_conj=noun2[seg_conj[1]:]
			affix_conj=prefix_conj+'-'+suffix_conj;
			# get all vocalized form of suffixes
			for vocalized_suffix in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['vocalized']:
				# if u'تنوين' not in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[vocalized_suffix]['tags']:
				seg_conj_voc={'prefix':'','suffix':vocalized_suffix,'stem':stem_conj}
				# verify compatibility between procletics and afix
				if (self.is_compatible_proaffix_affix(procletic, encletic, vocalized_suffix)):
				# verify the existing of a noun stamp in the dictionary
				# if self.NOUN_DICTIONARY_STAMP.has_key(stamp):
					# list_seg_conj2.append(seg_conj)
					list_seg_conj_voc.append(seg_conj_voc)

		list_seg_conj=list_seg_conj_voc;
		for seg_conj in list_seg_conj:
			prefix_conj=seg_conj['prefix'];
			stem_conj=seg_conj['stem']
			suffix_conj=seg_conj['suffix']
			has_plural_suffix=((u"جمع" in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['tags']) or( u"مثنى" in stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['tags']))
			#print "has_plural", has_plural_suffix;
			affix_conj='-'.join([prefix_conj,suffix_conj])
			# noirmalize hamza before gessing  deffirents origines
			stem_conj = araby.normalizeHamza(stem_conj)
			if self.debug:
				print "*\t", "-".join([str(len(stem_conj)),prefix_conj,stem_conj,suffix_conj]).encode("utf8") ;
			# generate possible stems
			# add stripped letters to the stem to constitute possible noun list
			possible_noun_list=self.getStemVariants(stem_conj,prefix_conj,suffix_conj);
			if self.debug:
				print "\tpossible original nouns:  ","\t".join(possible_noun_list).encode('utf8');
			# search the noun in the dictionary
			# we can return the tashkeel
			infnoun_form_list=[];
			for infnoun in possible_noun_list:
				# get the noun and get all its forms from the dict
				# if the noun has plural suffix, don't look up in broken plural dictionary
				infnoun_foundL=self.nounDictionary.lookup(infnoun);
				#infnoun_found=self.find_nouns_in_dictionary(infnoun,has_plural_suffix);
##							listsingle=self.find_broken_plural(infnoun);
##							print ' *****','-'.join(listsingle).encode('utf8')
				if len(infnoun_foundL)>0:
					if self.debug: print "\t in dict",infnoun.encode('utf8');
				else:
					if self.debug: print infnoun.encode('utf8'),"not found in dictionary"
				infnoun_form_list+=infnoun_foundL;
			for id in infnoun_form_list:
				noun_tuple=self.nounDictionary.getEntryById(id);
				infnoun=noun_tuple['vocalized'];
				# affixes tags contains prefixes and suffixes tags
				affix_tags  =  stem_noun_const.COMP_PREFIX_LIST_TAGS[procletic]['tags'] \
								+stem_noun_const.COMP_SUFFIX_LIST_TAGS[encletic]['tags'] \
								+stem_noun_const.CONJ_SUFFIX_LIST_TAGS[suffix_conj]['tags']
				#test if the  given word from dictionary accept those tags given by affixes
				# دراسة توافق الزوائد مع خصائص الاسم،
				# مثلا هل يقبل الاسم التأنيث.
				# if not self.validateTags(noun_tuple, affix_tags, procletic, encletic, suffix_conj):
					
				if self.validateTags(noun_tuple, affix_tags, procletic, encletic, suffix_conj):
					# if the result vocalized noun is not the same length
					vocalized=self.vocalize(infnoun,procletic,prefix_conj,suffix_conj,encletic);
					# the noun can have some harakat or shadda, then we must remove all tashkeel and compare
					# vocalized_nm=araby.stripTashkeel(vocalized);
					# noun_nm=araby.stripTashkeel(noun);

					original=noun_tuple['original'];
					wordtype=noun_tuple['wordtype'];
					#add some tags from dictionary entry as mamnou3 min sarf and broken plural
					originalTags=[];
					if noun_tuple['mamnou3_sarf']==u"ممنوع من الصرف":
						originalTags.append(u"ممنوع من الصرف");
					if noun_tuple['number']==u"جمع تكسير":
						originalTags.append(u"جمع تكسير");						
						# affix_tags+=(,);
					detailed_result.append(stemmedword.stemmedWord({
					'word':noun,
					'procletic':procletic,
					'encletic':encletic,
					'prefix':prefix_conj,
					'suffix':suffix_conj,
					'stem':stem_conj,
					'original':infnoun,#original,
					'vocalized':vocalized,
					'tags':u':'.join(affix_tags),
					'type':u':'.join(['Noun',wordtype]),#'Noun',
					'root':'',
					'template':'',
					'freq':'freqnoun', # to note the frequency type 
					'originaltags':u':'.join(originalTags),
					'syntax':'',
					}));
		return detailed_result;