Exemple #1
0
def __find_candidates_to_anaphora_in_chapter(chapter, total_words, candidates,
                                             stop_word_check):
    """ Finds candidates to anaphora in the specified chapter"""
    word_count = 0
    i = 0
    while i < len(chapter.sentences):
        if len(chapter.sentences[i]) > 1:
            candidate = Feature('anaphora')
            candidate.add_word(total_words + word_count)
            candidate.add_context(
                total_words + word_count,
                total_words + word_count + len(chapter.sentences[i]) - 1)
            word_count += len(chapter.sentences[i])
            first_anaphora_word = chapter.sentences[i][0].lower()
            i += 1
            if stop_word_check(first_anaphora_word):
                continue
            context_length = 1
            while i < len(chapter.sentences) and chapter.sentences[i][0].lower(
            ) == first_anaphora_word:
                candidate.extend_context(total_words + word_count +
                                         len(chapter.sentences[i]) - 1)
                candidate.add_word(total_words + word_count)
                word_count += len(chapter.sentences[i])
                i += 1
                context_length += 1
            if context_length > 1:
                if candidate not in candidates:
                    candidates.append(candidate)
        else:
            if len(chapter.sentences[i]) > 0 and chapter.sentences[i][0]:
                word_count += len(chapter.sentences[i])
            i += 1
    return word_count
Exemple #2
0
def __find_epiphora_inside_chapter(chapter: Chapter, start_count: int,
                                   stop_word_check) -> list:
    """
        Parses epiphora from chapter

    :param chapter: chapter to find epiphora from (list of sentences as list of words)
    :param start_count: index of first word in chapter
    :param stop_word_check: function checking if word is stop word
    :return: list with epiphora(Feature objects)
    """
    res = []
    word_count = start_count
    current_feature = None
    for i in range(len(chapter.sentences) - 1):
        if __test_sentences_for_epiphora(chapter[i], chapter[i + 1],
                                         stop_word_check):
            if current_feature is None:
                current_feature = Feature(
                    "epiphora",
                    words=[
                        word_count + len(chapter.sentences[i]) - 1,
                        word_count + len(chapter.sentences[i]) +
                        len(chapter.sentences[i + 1]) - 1
                    ],
                    context=[
                        word_count, word_count + len(chapter.sentences[i]) +
                        len(chapter.sentences[i + 1]) - 1
                    ])
                res.append(current_feature)
            else:
                current_feature.add_word(word_count + len(chapter[i]) +
                                         len(chapter[i + 1]) - 1)
                current_feature.extend_context(word_count + len(chapter[i]) +
                                               len(chapter[i + 1]) - 1)
        else:
            current_feature = None
        word_count += len(chapter[i])
    return res