def get_last_certain_break_index(string, index): if index >= len(string): return len(string) prev = get_group(string[index]) while True: if index <= 0: return 0 index -= 1 cur = get_group(string[index]) if get_break_possibility(cur, prev) == BreakPossibility.CERTAIN: return index + 1 prev = cur
def __init__(self, string): self.str_iter = iter(string) try: self.buffer = next(self.str_iter) except StopIteration: self.buffer = None else: _, state = FSM.default(get_group(self.buffer)) self.state = state
def __next__(self): for codepoint in self.str_iter: should_break, state = self.state(get_group(codepoint)) self.state = state if should_break: return self._break(codepoint) self.buffer += codepoint if self.buffer: return self._break(None) raise StopIteration()
def test_get_group(self): self.assertEqual(get_group("s"), GraphemePropertyGroup.OTHER)
def test_get_group_lf(self): self.assertEqual(get_group("\u000A"), GraphemePropertyGroup.LF)
def test_get_group_cr(self): self.assertEqual(get_group("\u000D"), GraphemePropertyGroup.CR)
def test_get_group_prepend(self): self.assertEqual(get_group("\u0605"), GraphemePropertyGroup.PREPEND)