def parse_chapters(soup): results = soup.find_all('h1') raw_chapters = [ data.Chapter( text=result.text, soup_index=get_soup_index(soup, result)) for result in results] chapters = merge_adjacent_chapter_items(raw_chapters) clean_chapter_text(chapters) return chapters
def parse_chapters(soup): results = soup.find_all('h1') raw_chapters = [ data.Chapter(text=result.text, soup_index=get_soup_index(soup, result)) for result in results ] chapters = merge_adjacent_chapter_items(raw_chapters) clean_chapter_text(chapters) chapters = [ chapter for chapter in chapters if not should_be_excluded(chapter) ] return chapters
def merge_two(a, b): if (a.text in b.text) or (b.text in a.text): text = a.text else: text = a.text + ' ' + b.text return data.Chapter(text=text, soup_index=a.soup_index)