예제 #1
0
파일: Rule.py 프로젝트: OpenEdition/bilbo
	def _sepFrontSpePunc(self, word):
		"""
		Separate special punctuation marks at the front of the word
		Special punctuation marks are non-English marks, which cannot be processed by regular expression
		Check out 'self.special'
		"""
		
		frontWords = []
		input_str = word.nom
		tagNames = word.listNomTag()
		featNames = word.listNomFeature()
		new_str = input_str
		
		for key in self.special.keys() :
			if new_str.find(key) == 0 :
				new_str = new_str[len(key):]
				tmpWord = Word(key, tagNames, featNames)
				tmpWord.addTag("c")
				frontWords.append(tmpWord)
				
		return frontWords, new_str
예제 #2
0
파일: Rule.py 프로젝트: OpenEdition/bilbo
	def _sepFrontPunc(self, word):
		"""
		Separate punctuation marks at the front of the word
		"""
		frontWords = []
		input_str = word.nom
		tagNames = word.listNomTag()
		featNames = word.listNomFeature()
		tmp_str = input_str
		i=0
		allPunc = '.,():;{}[]!?#$%\*+<=>@^_|~"' #exclude /
		if self.options.u : allPunc = allPunc[:-1]
		while (i < len(input_str)) :
			c = input_str[i]
			if c in allPunc :
				tmpWord = Word(c, tagNames, featNames)
				tmpWord.addTag("c")
				frontWords.append(tmpWord) #create word for a punctuation mark
				tmp_str = input_str[i+1:]
				i += 1
			else : i = len(input_str) #exit
				
		return frontWords, tmp_str