def __find_candidates_to_anaphora_in_chapter(chapter, total_words, candidates, stop_word_check): """ Finds candidates to anaphora in the specified chapter""" word_count = 0 i = 0 while i < len(chapter.sentences): if len(chapter.sentences[i]) > 1: candidate = Feature('anaphora') candidate.add_word(total_words + word_count) candidate.add_context( total_words + word_count, total_words + word_count + len(chapter.sentences[i]) - 1) word_count += len(chapter.sentences[i]) first_anaphora_word = chapter.sentences[i][0].lower() i += 1 if stop_word_check(first_anaphora_word): continue context_length = 1 while i < len(chapter.sentences) and chapter.sentences[i][0].lower( ) == first_anaphora_word: candidate.extend_context(total_words + word_count + len(chapter.sentences[i]) - 1) candidate.add_word(total_words + word_count) word_count += len(chapter.sentences[i]) i += 1 context_length += 1 if context_length > 1: if candidate not in candidates: candidates.append(candidate) else: if len(chapter.sentences[i]) > 0 and chapter.sentences[i][0]: word_count += len(chapter.sentences[i]) i += 1 return word_count
def __find_epiphora_inside_chapter(chapter: Chapter, start_count: int, stop_word_check) -> list: """ Parses epiphora from chapter :param chapter: chapter to find epiphora from (list of sentences as list of words) :param start_count: index of first word in chapter :param stop_word_check: function checking if word is stop word :return: list with epiphora(Feature objects) """ res = [] word_count = start_count current_feature = None for i in range(len(chapter.sentences) - 1): if __test_sentences_for_epiphora(chapter[i], chapter[i + 1], stop_word_check): if current_feature is None: current_feature = Feature( "epiphora", words=[ word_count + len(chapter.sentences[i]) - 1, word_count + len(chapter.sentences[i]) + len(chapter.sentences[i + 1]) - 1 ], context=[ word_count, word_count + len(chapter.sentences[i]) + len(chapter.sentences[i + 1]) - 1 ]) res.append(current_feature) else: current_feature.add_word(word_count + len(chapter[i]) + len(chapter[i + 1]) - 1) current_feature.extend_context(word_count + len(chapter[i]) + len(chapter[i + 1]) - 1) else: current_feature = None word_count += len(chapter[i]) return res