def _parse(self, course: NavigableString) -> ParseType: """Parses course to get its link & icon url, title, description counts and stores. :course: BeautifulSoup Object""" info = { "link": "", "icon": "", "title": "", "description": "", "counts": {} } info["link"] = course.a["href"] info["icon"] = course.a.img["src"] description: NavigableString = course.a.div info["title"] = description.div.get_text() info["description"] = description.p.get_text() counts: NavigableString = course.find("div", {"class": "courseCounts"}) counts_data: ResultSet = counts.find_all("li") for data in counts_data: name: str = data.span.get_text().lower() val: str = data.find("p").get_text() info["counts"][name] = int(val.replace(",", "")) return info
def _parse_details(self, code: NavigableString) -> ParseType: """Parses a codeContainer and extracts all the info.""" # Format of details -> # {votes: 1184, answers: 24077, post_link: <PostLink> title: <Title>, # tags: [<Tags>, ...], author_name: <AuthorName>, author_link: <Link>, # data_date: <DateTime>, avatar_link: <Link>} details: ParseType = {} post_stats: NavigableString = code.find("div", {"class": "postStats"}) post_stats_children = list(post_stats.children) details["votes"] = post_stats_children[1].p.string # Note the spelling of <a class='postAnsewers'. details["answers"] = post_stats_children[3].p.string post_details: NavigableString = code.find("div", {"class": "postDetails"}) details["post_link"] = post_details.p.a["href"] details["title"] = post_details.p.a.string tags_wrapper: NavigableString = list(post_details.children)[3] tags: ResultSet = tags_wrapper.find_all("span") tag_list: List[str] = [] for tag in tags: tag_list.append(tag.string) details["tags"] = tag_list author_details: NavigableString = code.find("div", {"class": "authorDetails"}) details["author_name"] = author_details.div.a.string details["author_link"] = author_details.div.a["href"] details["data_date"] = author_details.p["data-date"] details["avatar_link"] = list(author_details.children)[3].img["src"] return details
def unwrap_ul(li: element.NavigableString) -> WebToonChapter: link = li.find('a')['href'] pretty_name = li.find('img')['alt'] chapter = WebToonChapter.from_url(link, pretty_name) return chapter