def from_content(cls, content, announcement_id=0): """Parses the content of an announcement's page from Tibia.com Parameters ---------- content: :class:`str` The HTML content of an announcement in Tibia.com announcement_id: :class:`int` The id of the announcement. Since there is no way to obtain the id from the page, the id may be passed to assing. Returns ------- :class:`ForumAnnouncement` The announcement contained in the page or :obj:`None` if not found. Raises ------ InvalidContent If content is not the HTML content of an announcement page in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])] if not root_tables: error_table = parsed_content.find("table", attrs={"class": "Table1"}) if error_table and "not be found" in error_table.text: return None raise errors.InvalidContent("content is not a Tibia.com forum announcement.") forum_info_table, posts_table, footer_table = root_tables section_link, board_link, *_ = forum_info_table.find_all("a") section = section_link.text section_id = int(section_id_regex.search(section_link["href"]).group(1)) board = board_link.text board_id = int(board_id_regex.search(board_link["href"]).group(1)) announcement = cls(section=section, section_id=section_id, board=board, board_id=board_id, announcement_id=announcement_id) timezone = timezone_regex.search(footer_table.text).group(1) offset = 1 if timezone == "CES" else 2 announcement_container = posts_table.find("td", attrs={"class": "CipPost"}) character_info_container = announcement_container.find("div", attrs={"class": "PostCharacterText"}) announcement.author = ForumAuthor._parse_author_table(character_info_container) post_container = posts_table.find("div", attrs={"class": "PostText"}) title_tag = post_container.find("b") announcement.title = title_tag.text dates_container = post_container.find("font") dates = post_dates_regex.findall(dates_container.text) announcement_content = post_container.encode_contents().decode() _, announcement_content = announcement_content.split("<hr/>", 1) announcement.content = announcement_content announcement.start_date, announcement.end_date = (parse_tibia_forum_datetime(date, offset) for date in dates) return announcement
def from_content(cls, content): """Parse the content of the leaderboards page. Parameters ---------- content: :class:`str` The HTML content of the leaderboards page. Returns ------- :class:`Leaderboard` The ledaerboard if found. """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", {"class": "TableContent"}) form = parsed_content.find("form") data = parse_form_data(form, include_options=True) current_world = data["world"] current_rotation = None rotations = [] for label, value in data["__options__"]["rotation"].items(): current = False if "Current" in label: label = "".join(rotation_end_pattern.findall(label)) current = True rotation_end = parse_tibia_datetime(label) rotation = LeaderboardRotation(int(value), rotation_end, current) if value == data["rotation"]: current_rotation = rotation rotations.append(rotation) leaderboard = cls(current_world, current_rotation) leaderboard.available_worlds = [ w for w in data["__options__"]["world"].values() if w ] leaderboard.available_rotations = rotations if leaderboard.rotation and leaderboard.rotation.current: last_update_table = tables[2] numbers = re.findall(r'(\d+)', last_update_table.text) if numbers: leaderboard.last_update = datetime.timedelta( minutes=int(numbers[0])) leaderboard._parse_entries(tables[-1]) pagination_block = parsed_content.find("small") pages, total, count = parse_pagination( pagination_block) if pagination_block else (0, 0, 0) leaderboard.page = pages leaderboard.total_pages = total leaderboard.results_count = count return leaderboard except (AttributeError, ValueError) as e: raise errors.InvalidContent( "content does not belong to the leaderboards", e)
def from_content(cls, content): """Parse the content of a spells page. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`Spell` The spell data. If the spell doesn't exist, this will be :obj:`None`. Raises ------ InvalidContent If content is not the HTML of the spells section. """ parsed_content = parse_tibiacom_content(content) try: tables = parse_tibiacom_tables(parsed_content) title_table = parsed_content.find("table", attrs={"class": False}) spell_table = tables["Spell Information"] img = title_table.find("img") url = urllib.parse.urlparse(img["src"]) filename = os.path.basename(url.path) identifier = str(filename.split(".")[0]) next_sibling = title_table.next_sibling description = "" while next_sibling: if isinstance(next_sibling, bs4.Tag): if next_sibling.name == "br": description += "\n" elif next_sibling.name == "table": break else: description += next_sibling.text elif isinstance(next_sibling, bs4.NavigableString): description += str(next_sibling) next_sibling = next_sibling.next_sibling spell = cls._parse_spells_table(identifier, spell_table) spell.description = description.strip() if "Rune Information" in tables: spell.rune = cls._parse_rune_table(tables["Rune Information"]) return spell except (TypeError, AttributeError, IndexError, KeyError) as e: form = parsed_content.find("form") if form: data = parse_form_data(form) if "subtopic=spells" in data.get("__action__"): return None raise errors.InvalidContent("content is not a spell page", e)
def list_from_content(cls, content): """Parses the content of a board list Tibia.com into a list of boards. Parameters ---------- content: :class:`str` The raw HTML response from the board list. Returns ------- :class:`list` of :class:`ListedBoard` Raises ------ InvalidContent` Content is not a board list in Tibia.com """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) _, board_list_table, timezone_table = tables _, *board_rows = board_list_table.find_all("tr") timezone_text = timezone_table.text timezone = timezone_regex.search(timezone_text).group(1) offset = 1 if timezone == "CES" else 2 boards = [] for board_row in board_rows[:-3]: try: board = cls._parse_board_row(board_row, offset) except IndexError: continue else: boards.append(board) return boards except ValueError as e: raise errors.InvalidContent("content does not belong to a forum section.", e)
def from_content(cls, content): """Creates an instance of the class from the html content of the thread's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`ForumThread` The thread contained in the page, or None if the thread doesn't exist Raises ------ InvalidContent If content is not the HTML of a thread's page. """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table") root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])] if not root_tables: error_table = parsed_content.find("table", attrs={"class": "Table1"}) if error_table and "not found" in error_table.text: return None raise errors.InvalidContent("content is not a Tibia.com forum thread.") try: if len(root_tables) == 4: forum_info_table, title_table, posts_table, footer_table = root_tables else: forum_info_table, title_table, footer_table = root_tables posts_table = None except ValueError as e: raise errors.InvalidContent("content is not a Tibia.com forum thread.", e) header_text = forum_info_table.text section, board, *_ = split_list(header_text, "|", "|") thread = cls(section=section, board=board) thread.title = title_table.text.strip() golden_frame = title_table.find("div", attrs={"class": "CipPost"}) thread.golden_frame = golden_frame is not None timezone = timezone_regex.search(footer_table.text).group(1) time_page_column, navigation_column = footer_table.find_all("td", attrs={"class", "ff_white"}) page_links = time_page_column.find_all("a") if page_links: last_link = page_links[-1]["href"] thread.page = int(footer_table.find("span").text) thread.total_pages = max(int(page_number_regex.search(last_link).group(1)), thread.page) navigation_links = navigation_column.find_all("a") if len(navigation_links) == 2: prev_link, next_link = navigation_links prev_link_url = prev_link["href"] thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1)) next_link_url = next_link["href"] thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1)) elif "Previous" in navigation_links[0].text: prev_link_url = navigation_links[0]["href"] thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1)) else: next_link_url = navigation_links[0]["href"] thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1)) offset = 1 if timezone == "CES" else 2 if posts_table: thread_info_table, *post_tables = posts_table.find_all("div", attrs={"class": "ForumPost"}) inner_info_table = thread_info_table.find("table") thread_num_col, thread_pages_col, thread_navigation_col = inner_info_table.find_all("td") thread.thread_id = int(thread_num_col.text.replace("Thread #", "")) for post_table in post_tables: post = cls._parse_post_table(post_table, offset) thread.posts.append(post) return thread
def from_content(cls, content): """Parses the board's HTML content from Tibia.com. Parameters ---------- content: :class:`str` The HTML content of the board. Returns ------- :class:`ForumBoard` The forum board contained. Raises ------ InvalidContent` Content is not a board in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table") try: header_table, time_selector_table, threads_table, timezone_table, boardjump_table, *_ = tables except ValueError as e: raise errors.InvalidContent("content is not a forum board", e) header_text = header_table.text.strip() section, name = split_list(header_text, "|", "|") board = cls(name=name, section=section) thread_rows = threads_table.find_all("tr") age_selector = time_selector_table.find("select") if not age_selector: return cls(section=section, name=name) selected_age = age_selector.find("option", {"selected": True}) if selected_age: board.age = int(selected_age["value"]) board_selector = boardjump_table.find("select") selected_board = board_selector.find("option", {"selected": True}) board.board_id = int(selected_board["value"]) page_info = threads_table.find("td", attrs={"class": "ff_info"}) if page_info: current_page_text = page_info.find("span") page_links = page_info.find_all("a") if current_page_text: board.page = int(current_page_text.text) board.total_pages = max(board.page, int(page_number_regex.search(page_links[-1]["href"]).group(1))) for thread_row in thread_rows[1:]: columns = thread_row.find_all("td") if len(columns) != 7: continue entry = cls._parse_thread_row(columns) if isinstance(entry, ListedThread): board.threads.append(entry) cip_border = thread_row.find("div", attrs={"class": "CipBorder"}) if cip_border: entry.golden_frame = True elif isinstance(entry, ListedAnnouncement): board.announcements.append(entry) return board
def from_content(cls, content): """Parses the content of the CM Post Archive page from Tibia.com Parameters ---------- content: :class:`str` The HTML content of the CM Post Archive in Tibia.com Returns ------- :class:`CMPostArchive` The CM Post archive found in the page. Raises ------ InvalidContent If content is not the HTML content of the CM Post Archive in Tibia.com """ parsed_content = parse_tibiacom_content(content) form = parsed_content.find("form") try: start_month_selector, start_day_selector, start_year_selector, \ end_month_selector, end_day_selector, end_year_selector = form.find_all("select") start_date = cls._get_selected_date(start_month_selector, start_day_selector, start_year_selector) end_date = cls._get_selected_date(end_month_selector, end_day_selector, end_year_selector) except (AttributeError, ValueError) as e: raise errors.InvalidContent("content does not belong to the CM Post Archive in Tibia.com", e) cm_archive = cls(start_date=start_date, end_date=end_date) table = parsed_content.find("table", attrs={"class", "Table3"}) if not table: return cm_archive inner_table_container = table.find("div", attrs={"class", "InnerTableContainer"}) inner_table = inner_table_container.find("table") inner_table_rows = inner_table.find_all("tr") inner_table_rows = [e for e in inner_table_rows if e.parent == inner_table] table_content = inner_table_container.find("table", attrs={"class", "TableContent"}) header_row, *rows = table_content.find_all("tr") for row in rows: columns = row.find_all("td") date_column = columns[0] date = parse_tibia_datetime(date_column.text.replace("\xa0", " ")) board_thread_column = columns[1] convert_line_breaks(board_thread_column) board, thread = board_thread_column.text.splitlines() link_column = columns[2] post_link = link_column.find("a") post_link_url = post_link["href"] post_id = int(post_id_regex.search(post_link_url).group(1)) cm_archive.posts.append(CMPost(date=date, board=board, thread_title=thread, post_id=post_id)) if not cm_archive.posts: return cm_archive pages_column, results_column = inner_table_rows[-1].find_all("div") page_links = pages_column.find_all("a") listed_pages = [int(p.text) for p in page_links] if listed_pages: cm_archive.page = next((x for x in range(1, listed_pages[-1] + 1) if x not in listed_pages), 0) cm_archive.total_pages = max(int(page_links[-1].text), cm_archive.page) if not cm_archive.page: cm_archive.total_pages += 1 cm_archive.page = cm_archive.total_pages cm_archive.results_count = int(results_column.text.split(":")[-1]) return cm_archive
def from_content(cls, content): """Parse the content of the spells section. Parameters ----------- content: :class:`str` The HTML content of the page. Returns ---------- :class:`SpellsSection` The spells contained and the filtering information. Raises ------ InvalidContent If content is not the HTML of the spells section. """ try: parsed_content = parse_tibiacom_content(content) table_content_container = parsed_content.find( "div", attrs={"class": "InnerTableContainer"}) spells_table = table_content_container.find( "table", class_=lambda t: t != "TableContent") spell_rows = spells_table.find_all( "tr", {'bgcolor': ["#D4C0A1", "#F1E0C6"]}) spells_section = cls() for row in spell_rows: columns = row.find_all("td") if len(columns) != 7: continue spell_link = columns[0].find("a") url = urllib.parse.urlparse(spell_link["href"]) query = urllib.parse.parse_qs(url.query) cols_text = [c.text for c in columns] identifier = query["spell"][0] match = spell_name.findall(cols_text[0]) name, words = match[0] group = try_enum(SpellGroup, cols_text[1]) spell_type = try_enum(SpellType, cols_text[2]) level = int(cols_text[3]) mana = parse_integer(cols_text[4], None) price = parse_integer(cols_text[5], 0) premium = "yes" in cols_text[6] spell = SpellEntry(name=name.strip(), words=words.strip(), spell_type=spell_type, level=level, group=group, mana=mana, premium=premium, price=price, identifier=identifier) spells_section.entries.append(spell) form = parsed_content.find("form") data = parse_form_data(form) spells_section.vocation = try_enum(VocationSpellFilter, data["vocation"]) spells_section.group = try_enum(SpellGroup, data["group"]) spells_section.premium = try_enum(SpellGroup, data["group"]) spells_section.spell_type = try_enum(SpellType, data["type"]) spells_section.sort_by = try_enum(SpellSorting, data["sort"]) spells_section.premium = "yes" in data["premium"] if data[ "premium"] else None return spells_section except (AttributeError, TypeError) as e: raise errors.InvalidContent( "content does not belong to the Spells section", e)