Esempio n. 1
0
def parse_chapters(soup):
    results = soup.find_all('h1')
    raw_chapters = [
        data.Chapter(
            text=result.text,
            soup_index=get_soup_index(soup, result))
        for result in results]
    chapters = merge_adjacent_chapter_items(raw_chapters)
    clean_chapter_text(chapters)
    return chapters
Esempio n. 2
0
def parse_chapters(soup):
    results = soup.find_all('h1')
    raw_chapters = [
        data.Chapter(text=result.text, soup_index=get_soup_index(soup, result))
        for result in results
    ]
    chapters = merge_adjacent_chapter_items(raw_chapters)
    clean_chapter_text(chapters)
    chapters = [
        chapter for chapter in chapters if not should_be_excluded(chapter)
    ]
    return chapters
Esempio n. 3
0
def merge_two(a, b):
    if (a.text in b.text) or (b.text in a.text):
        text = a.text
    else:
        text = a.text + ' ' + b.text
    return data.Chapter(text=text, soup_index=a.soup_index)