def _find_sentences(self):
        sentences = []
        token_offset = self.content_idx.start
        for i, sentence in enumerate(split_sentences(self.content)):
            start_token, char_offset = None, 0
            for token in self._document.tokens[token_offset:self.content_idx.
                                               end + 1]:
                if token.text == '\n':
                    continue
                token_pos = sentence[2].find(token.text, char_offset)
                if token_pos >= 0 and start_token is None:
                    start_token = token.index
                elif token_pos >= 0:
                    token_offset = token.index
                else:
                    break
                char_offset = token_pos + len(token.text)

            if start_token is not None and token_offset >= start_token:
                sentences.append(
                    Sentence(self._document, start_token, token_offset))
                sentences[-1].index = i
            token_offset += 1

        return sentences
 def test_sentence_splitter_8(self):
     with open(os.path.join(self._folder, 'sentence_splitter_text_8.txt'), 'r', encoding='utf-8') as f:
         content = f.read()
     sentences = split_sentences(content)
     self.assertEqual(len(sentences), 2, 'Not all sentences split')
 def test_sentence_splitter_7(self):
     sentences = split_sentences(self.get_document('sentence_splitter_text_7.xml').text)
     self.assertEqual(len(sentences), 15, 'Not all sentences split')