コード例 #1
0
ファイル: stem_unknown.py プロジェクト: ATouhou/mishkal
	def vocalize(self,noun, proclitic,prefix,suffix,enclitic):
		"""
		Join the  noun and its affixes, and get the vocalized form
		@param noun: noun found in dictionary.
		@type noun: unicode.
		@param proclitic: first level prefix.
		@type proclitic: unicode.
		@param prefix: second level suffix.
		@type prefix: unicode.
		@param suffix: second level suffix.
		@type suffix: unicode.
		@param enclitic: first level suffix.
		@type enclitic: unicode.		
		@return: vocalized word.
		@rtype: unicode.
		"""
		enclitic_voc=stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0];
		proclitic_voc=stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0];
		suffix_voc=suffix;#CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0];
		#adjust some some harakat
		
		#strip last if tanwin or harakat
		if noun[-1:] in araby.HARAKAT:#(DAMMATAN,FATHATAN,KASRATAN,FATHA,DAMMA,KASRA):
			noun=noun[:-1];
		#add shadda if the first letter is sunny and the prefix ends by al definition
		if proclitic.endswith(araby.ALEF+araby.LAM) and araby.isSun(noun[0]):
			noun=u''.join([noun[0],araby.SHADDA,noun[1:]]);
			#strip the Skun from the lam
			if proclitic_voc.endswith(araby.SUKUN):
				proclitic_voc=proclitic_voc[:-1];
		noun=self.getWordVariant(noun,suffix);
		noun=self.getWordVariant(noun,enclitic);		
		suffix_voc=self.getSuffixVariant(noun, suffix_voc,enclitic);
		return ''.join([ proclitic_voc,prefix,noun,suffix_voc,enclitic_voc]);
コード例 #2
0
	def vocalize(self, noun, proclitic,  suffix, enclitic):
		"""
		Join the  noun and its affixes, and get the vocalized form
		@param noun: noun found in dictionary.
		@type noun: unicode.
		@param proclitic: first level prefix.
		@type proclitic: unicode.

		@param suffix: second level suffix.
		@type suffix: unicode.
		@param enclitic: first level suffix.
		@type enclitic: unicode.		
		@return: vocalized word.
		@rtype: unicode.
		"""
		# enclitic and procletric have only an uniq vocalization in arabic
		enclitic_voc  = stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0];
		proclitic_voc = stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0];
		suffix_voc    = suffix;#CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0];
		#adjust some some harakat
		
		#strip last if tanwin or last harakat
		if araby.isHaraka(noun[-1:]):#(DAMMATAN, FATHATAN, KASRATAN, FATHA, DAMMA, KASRA):
			noun = noun[:-1];
		# convert Fathatan into one fatha, in some cases where the tanwin is not at the end: eg. محتوًى
		noun = noun.replace(araby.FATHATAN, araby.FATHA);

		#add shadda if the first letter is sunny and the procletic contains AL definition mark
		if (u'تعريف' in stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["tags"] and araby.isSun(noun[0])):
		#if (u'تعريف' in proclitic.endswith(araby.ALEF+araby.LAM) or proclitic.endswith(araby.LAM+araby.LAM)) and araby.isSun(noun[0]):
			noun = u''.join([noun[0], araby.SHADDA, noun[1:]]);
			#strip the Skun from the lam
			if proclitic_voc.endswith(araby.SUKUN):
				proclitic_voc=proclitic_voc[:-1];
		# generate the word variant for some words witch ends by special letters like Teh_marbuta or Alef_maksura, or hamza, the variant is influed by the suffix harakat, 
		# for example مدرسة+ي= مدرست+ي
		noun         = self.getWordVariant(noun, suffix+enclitic);

		# generate the suffix variant. if the suffix is Teh_marbuta or Alef_maksura, or hamza, the variant is influed by the enclitic harakat, 
		# for example مدرس+ة+ي=مدرس+ت+ي		
		suffix_voc, suffix_NonIrabMark   = self.getSuffixVariant(noun, suffix_voc, enclitic);

		#Get the enclitic variant to be joined to the word.
		#For example: word = مدرس, suffix=ِة, encletic=هُ. The enclitic  is convert to HEH+ KAsra.
		enclitic_voc = self.getEncliticVariant(noun, suffix_voc, enclitic_voc);

		# generate the non vacalized end word: the vocalized word without the I3rab Mark
		# if the suffix is a short haraka 
		wordNonIrabMark= ''.join([ proclitic_voc,  noun, suffix_NonIrabMark,   enclitic_voc])			 
			
		wordVocalized =''.join([ proclitic_voc, noun, suffix_voc, enclitic_voc]);
		return wordVocalized,wordNonIrabMark 
コード例 #3
0
ファイル: stem_noun.py プロジェクト: abougouffa/mishkal
	def vocalize(self, noun, proclitic,  suffix, enclitic):
		"""
		Join the  noun and its affixes, and get the vocalized form
		@param noun: noun found in dictionary.
		@type noun: unicode.
		@param proclitic: first level prefix.
		@type proclitic: unicode.

		@param suffix: second level suffix.
		@type suffix: unicode.
		@param enclitic: first level suffix.
		@type enclitic: unicode.		
		@return: vocalized word.
		@rtype: unicode.
		"""
		# enclitic and procletric have only an uniq vocalization in arabic
		enclitic_voc  = stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0];
		proclitic_voc = stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0];
		suffix_voc    = suffix;#CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0];
		#adjust some some harakat
		
		#strip last if tanwin or last harakat
		if araby.isHaraka(noun[-1:]):#(DAMMATAN, FATHATAN, KASRATAN, FATHA, DAMMA, KASRA):
			noun = noun[:-1];
		# convert Fathatan into one fatha, in some cases where the tanwin is not at the end: eg. محتوًى
		noun = noun.replace(araby.FATHATAN, araby.FATHA);

		#add shadda if the first letter is sunny and the procletic contains AL definition mark
		if (u'تعريف' in stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["tags"] and araby.isSun(noun[0])):
		#if (u'تعريف' in proclitic.endswith(araby.ALEF+araby.LAM) or proclitic.endswith(araby.LAM+araby.LAM)) and araby.isSun(noun[0]):
			noun = u''.join([noun[0], araby.SHADDA, noun[1:]]);
			#strip the Skun from the lam
			if proclitic_voc.endswith(araby.SUKUN):
				proclitic_voc=proclitic_voc[:-1];
		# generate the word variant for some words witch ends by special letters like Teh_marbuta or Alef_maksura, or hamza, the variant is influed by the suffix harakat, 
		# for example مدرسة+ي= مدرست+ي
		noun         = self.getWordVariant(noun, suffix+enclitic);

		# generate the suffix variant. if the suffix is Teh_marbuta or Alef_maksura, or hamza, the variant is influed by the enclitic harakat, 
		# for example مدرس+ة+ي=مدرس+ت+ي		
		suffix_voc, suffix_NonIrabMark   = self.getSuffixVariant(noun, suffix_voc, enclitic);

		#Get the enclitic variant to be joined to the word.
		#For example: word = مدرس, suffix=ِة, encletic=هُ. The enclitic  is convert to HEH+ KAsra.
		enclitic_voc = self.getEncliticVariant(noun, suffix_voc, enclitic_voc);

		# generate the non vacalized end word: the vocalized word without the I3rab Mark
		# if the suffix is a short haraka 
		wordNonIrabMark= ''.join([ proclitic_voc,  noun, suffix_NonIrabMark,   enclitic_voc])			 
			
		wordVocalized =''.join([ proclitic_voc, noun, suffix_voc, enclitic_voc]);
		return wordVocalized,wordNonIrabMark 
コード例 #4
0
    def vocalize(self, noun, proclitic, prefix, suffix, enclitic):
        """
		Join the  noun and its affixes, and get the vocalized form
		@param noun: noun found in dictionary.
		@type noun: unicode.
		@param proclitic: first level prefix.
		@type proclitic: unicode.
		@param prefix: second level suffix.
		@type prefix: unicode.
		@param suffix: second level suffix.
		@type suffix: unicode.
		@param enclitic: first level suffix.
		@type enclitic: unicode.		
		@return: vocalized word.
		@rtype: unicode.
		"""
        enclitic_voc = stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic][
            "vocalized"][0]
        proclitic_voc = stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic][
            "vocalized"][0]
        suffix_voc = suffix
        #CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0];
        #adjust some some harakat

        #strip last if tanwin or last harakat
        if noun[-1:] in araby.HARAKAT:  #(DAMMATAN,FATHATAN,KASRATAN,FATHA,DAMMA,KASRA):
            noun = noun[:-1]
        # convert Fathatan into one fatha: eg. محتوًى
        noun = noun.replace(araby.FATHATAN, araby.FATHA)
        #add shadda if the first letter is sunny and the prefix ends by al definition
        if (proclitic.endswith(araby.ALEF + araby.LAM)
                or proclitic.endswith(araby.LAM + araby.LAM)) and araby.isSun(
                    noun[0]):
            noun = u''.join([noun[0], araby.SHADDA, noun[1:]])
            #strip the Skun from the lam
            if proclitic_voc.endswith(araby.SUKUN):
                proclitic_voc = proclitic_voc[:-1]
        #noun=self.getWordVariant(noun,suffix);
        noun = self.getWordVariant(noun, suffix + enclitic)
        suffix_voc = self.getSuffixVariant(noun, suffix_voc, enclitic)
        enclitic_voc = self.getEncliticVariant(noun, suffix_voc, enclitic_voc)
        return ''.join([proclitic_voc, prefix, noun, suffix_voc, enclitic_voc])