Python BreakIterator.createWordInstance 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: icu

클래스/타입: BreakIterator

메소드/함수: createWordInstance

hotexamples.com에서의 예제들: 14

Python BreakIterator.createWordInstance - 14개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 icu.BreakIterator.createWordInstance에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

createWordInstance(10)

createSentenceInstance(4)

createCharacterInstance(2)

setText(1)

예제 #1

파일 보기

파일: khmer_segment_icu.py 프로젝트: lychhayly/khmer_segment

def gen_khm_words(text: str) -> str:
    bi = BreakIterator.createWordInstance(Locale("km"))
    bi.setText(text)
    start = bi.first()
    for end in bi:
        yield text[start:end]
        start = end

예제 #2

파일 보기

파일: pyicu.py 프로젝트: eveem/pythainlp

def _gen_words(text: str) -> str:
    bd = BreakIterator.createWordInstance(Locale("th"))
    bd.setText(text)
    p = bd.first()
    for q in bd:
        yield text[p:q]
        p = q

예제 #3

파일 보기

파일: pyicu.py 프로젝트: wannaphongcom/pythainlp

def _gen_words(text: str) -> str:
    bd = BreakIterator.createWordInstance(Locale("th"))
    bd.setText(text)
    p = bd.first()
    for q in bd:
        yield text[p:q]
        p = q

예제 #4

파일 보기

파일: utils.py 프로젝트: jsbien/tolejniczak-pdfautils

def divideIntoWords(txt, locale):
    loc = Locale.createFromName(locale)
    bi = BreakIterator.createWordInstance(loc)
    #print txt
    bi.setText(txt)
    res = []
    while True:
        try:
            #print bi.next()
            res.append(bi.next())
        except StopIteration:
            return res

예제 #5

파일 보기

 def _compute_icu_segmented(self):
     """
     This function computes the ICU segmented version of the line using the unsegmented version. Therefore, in order
     to use it the unsegmented version must have been already computed.
     """
     words_break_iterator = BreakIterator.createWordInstance(
         Locale.getRoot())
     words_break_iterator.setText(self.unsegmented)
     self.icu_word_brkpoints = [0]
     for brkpoint in words_break_iterator:
         self.icu_word_brkpoints.append(brkpoint)
     self.icu_segmented = "|"
     for i in range(len(self.icu_word_brkpoints) - 1):
         self.icu_segmented += self.unsegmented[
             self.icu_word_brkpoints[i]:self.icu_word_brkpoints[i +
                                                                1]] + "|"

예제 #6

파일 보기

파일: page_normalizer.py 프로젝트: jsbien/pol

 def endElement(self, name):
     if name == u"Unicode":
         self.__isUni = False
         loc = Locale.createFromName("utf-8")
         bi = BreakIterator.createWordInstance(loc)
         bi.setText(self.__uniText)
         tokens = []
         prev = 0
         while True:
             try:
                 ind = bi.next()
                 tokens.append(self.__uniText[prev:ind])
                 prev = ind
             except StopIteration:
                 break
         text = u""
         for t in tokens:
             text += processToken(t)
         self.__downstream.characters(text)
     self.__downstream.endElement(name)

예제 #7

파일 보기

파일: page_normalizer.py 프로젝트: impactcentre/pol

	def endElement(self, name):
		if name == u"Unicode":
			self.__isUni = False
			loc = Locale.createFromName("utf-8")
			bi = BreakIterator.createWordInstance(loc)
			bi.setText(self.__uniText)
			tokens = []
			prev = 0
			while True:
				try:
					ind = bi.next()
					tokens.append(self.__uniText[prev:ind])
					prev = ind
				except StopIteration:
					break
			text = u""
			for t in tokens:
				text += processToken(t)
			self.__downstream.characters(text)
		self.__downstream.endElement(name)

예제 #8

파일 보기

def main():

    print "ICU Break Iterator Sample Program"
    print "C++ Break Iteration in Python"
    
    stringToExamine = u"Aaa bbb ccc. Ddd eee fff."
    print "Examining: ", stringToExamine

    # print each sentence in forward and reverse order
    boundary = BreakIterator.createSentenceInstance(Locale.getUS())
    boundary.setText(stringToExamine)

    print
    print "Sentence Boundaries... "
    print "----- forward: -----------"
    printEachForward(boundary)
    print "----- backward: ----------"
    printEachBackward(boundary)

    # print each word in order
    print
    print "Word Boundaries..."
    boundary = BreakIterator.createWordInstance(Locale.getUS())
    boundary.setText(stringToExamine)
    print "----- forward: -----------"
    printEachForward(boundary)
    # print first element
    print "----- first: -------------"
    printFirst(boundary)
    # print last element
    print "----- last: --------------"
    printLast(boundary)
    # print word at charpos 10
    print "----- at pos 10: ---------"
    printAt(boundary, 10)

    print
    print "End C++ Break Iteration in Python"

예제 #9

파일 보기

파일: base.py 프로젝트: indatalabs/polyglot

 def __init__(self, locale='en'):
   super(WordTokenizer, self).__init__(locale)
   self.breaker = BreakIterator.createWordInstance(self.locale)

예제 #10

파일 보기

 def __init__(self):
     self.BreakIterator = BreakIterator.createWordInstance(
         Locale.createFromName('ar'))

예제 #11

파일 보기

 def __init__(self):
     self.locale = Locale("tr")
     self.breakor = BreakIterator.createWordInstance(self.locale)

예제 #12

파일 보기

 def _get_breaker(self, locale):
     return BreakIterator.createWordInstance(locale)

예제 #13

파일 보기

파일: unicode_29.py 프로젝트: myaser/DAPOS

 def __init__(self):
     self.BreakIterator = BreakIterator.createWordInstance(
                                               Locale.createFromName('ar'))

예제 #14

파일 보기

파일: base.py 프로젝트: whitenick/SoccerPredictions

 def __init__(self, locale='en'):
     super(WordTokenizer, self).__init__(locale)
     self.breaker = BreakIterator.createWordInstance(self.locale)