def main(): print "ICU Break Iterator Sample Program" print "C++ Break Iteration in Python" stringToExamine = u"Aaa bbb ccc. Ddd eee fff." print "Examining: ", stringToExamine # print each sentence in forward and reverse order boundary = BreakIterator.createSentenceInstance(Locale.getUS()) boundary.setText(stringToExamine) print print "Sentence Boundaries... " print "----- forward: -----------" printEachForward(boundary) print "----- backward: ----------" printEachBackward(boundary) # print each word in order print print "Word Boundaries..." boundary = BreakIterator.createWordInstance(Locale.getUS()) boundary.setText(stringToExamine) print "----- forward: -----------" printEachForward(boundary) # print first element print "----- first: -------------" printFirst(boundary) # print last element print "----- last: --------------" printLast(boundary) # print word at charpos 10 print "----- at pos 10: ---------" printAt(boundary, 10) print print "End C++ Break Iteration in Python"
def __init__(self, locale='en'): super(SentenceTokenizer, self).__init__(locale) self.breaker = BreakIterator.createSentenceInstance(self.locale)
def __init__(self, lang: str = 'en'): """SentSplitter.""" self.lang = lang self.locale = Locale(lang) self.break_iterator = \ BreakIterator.createSentenceInstance(self.locale)
def _get_breaker(self, locale): return BreakIterator.createSentenceInstance(locale)