def convert_ga(self, doc: html.Element) -> Dict: t = doc.findall(".//table") if len(t) == 0: return { "error": "no tables -> page layout changed", "at": udatetime.now_as_utc() } data = self._htmltable_to_dict(t[0]) if len(data["data"]) != 2: return { "error": "expected two data rows", "at": udatetime.now_as_utc() } if data["data"][0]["COVID-19 Confirmed Cases"] != "Total": return { "error": "first row should be totals", "at": udatetime.now_as_utc() } if data["data"][1]["COVID-19 Confirmed Cases"] != "Deaths": return { "error": "second row should be deaths", "at": udatetime.now_as_utc() } positive = data["data"][0]["No. Cases (%)"] positive = int(positive[0: positive.index("(")]) deaths = data["data"][1]["No. Cases (%)"] deaths = int(deaths[0: deaths.index("(")]) data = self._htmltable_to_dict(t[1]) if len(data["data"]) != 2: return { "error": "expected two data rows", "at": udatetime.now_as_utc() } if data["data"][0]["Lab"] != "Commercial Lab": return { "error": "first row should be Commerial Lab", "at": udatetime.now_as_utc() } if data["data"][1]["Lab"] != "GPHL": return { "error": "second row should be GPHL", "at": udatetime.now_as_utc() } lab_1 = int(data["data"][0]["Total Tests"]) lab_2 = int(data["data"][1]["Total Tests"]) tests = lab_1 + lab_2 return { "positive": positive, "tests": tests, "deaths": deaths }
def _inject_extra_elements(self, tree: html.Element, xurl: str): if xurl == None: return if len(tree) == 0 or tree[0].tag != "head": return base = tree.findall("base") if len(base) > 0: return base = html.Element("base") base.attrib["ref"] = xurl tree.insert(0, base)
def __init__(self, table: Element, headers: Sequence[str] = None) -> None: self.current_row = 0 self.rows = table.findall("tr") self.num_rows = len(self.rows) self.headers = [] if headers: self.headers = headers return thead = table.find("thead") if thead is not None: first_row = thead[0] else: first_row = self.rows[0] self.rows = self.rows[1:] # first row will be taken as header self.current_row = 0 self.num_rows -= 1 for td in first_row: header = td.text.strip() self.headers.append(header)
def parse_pokemon_from_tab( root: Element, tab_id: str, tab_name: str, pokemon_name: str, default: Optional[PokemonData] = None) -> Optional[PokemonData]: args = { "move_list": MoveList(), "misc_info": MiscInfo(), "dex_entries": DexEntryCollection() } # # # Figure out name and variant variant_str = tab_name.replace(pokemon_name, "").replace(" ", " ").strip() variant_str_split = variant_str.split(" ") first_word = variant_str_split[0] remaining_words = " ".join(variant_str_split[1:]) if Region.is_region_descriptor(first_word): region = Region.parse_descriptor(first_word) variant_name = PrefixVariantName(first_word, {}) variant_str = remaining_words.strip() else: region = Region.NONE variant_name = None if variant_str == "": args["variant"] = Variant(region=region) elif variant_str in ["Male", "Female"]: args["variant"] = Variant(gender=Gender(variant_str.upper()), region=region) variant_name = _merge_variant_names(variant_name, SuffixVariantName(variant_str, {})) elif variant_str.startswith("Mega"): mega_type = MegaType.X if variant_str.endswith("X") else MegaType.Y if variant_str.endswith("Y") else \ MegaType.NORMAL args["variant"] = Variant(mega_type=mega_type, region=region) if mega_type == MegaType.NORMAL: variant_name = _merge_variant_names(variant_name, PrefixVariantName("Mega", {})) else: variant_name = _merge_variant_names( variant_name, CircumfixVariantName("Mega", variant_str[-1], {}, {})) elif variant_str == "Primal": args["variant"] = Variant(form="Primal", region=region) variant_name = _merge_variant_names(variant_name, PrefixVariantName("Primal", {})) else: args["variant"] = Variant(form=variant_str, region=region) if pokemon_name in ["Kyurem", "Hoopa"]: variant_name = _merge_variant_names( variant_name, SuffixVariantName(variant_str, {}, comma=False)) elif pokemon_name == "Rotom": variant_name = _merge_variant_names( variant_name, SuffixVariantName(variant_str, {}, spacer="-")) elif variant_str in ["Partner", "Own Tempo" ] or pokemon_name in ["Necrozma"]: variant_name = _merge_variant_names( variant_name, PrefixVariantName(variant_str, {})) elif variant_str == "Ash-": variant_name = _merge_variant_names( variant_name, PrefixVariantName("Ash", {}, spacer="-")) else: variant_name = _merge_variant_names( variant_name, SuffixVariantName(variant_str, {}, comma=True)) args["name"] = Name(pokemon_name, {}, variant_name=variant_name) # # # Get typing, abilities, dex entries, and misc_info tab_div = root.find(f".//*[@id='{tab_id}']") tables = tab_div.findall(".//*[@class='vitals-table']") pokedex_data = tables[0].find("tbody") training = tables[1].find("tbody") for row in pokedex_data: header = row.find("th").text # if header == "Type": types = [Type(a.text.upper()) for a in row.find("td").findall("a")] if len(types) == 0: return None args["typing"] = Typing(*types) # elif header == "Abilities": abilities = dict() col = row.find("td") for span in col.findall("span") + col.findall("small"): ability_link = span.find("a") if ability_link.tail == " (hidden ability)": abilities["hidden_ability"] = Ability(ability_link.text) elif span.text == "1. ": abilities["primary_ability"] = Ability(ability_link.text) elif span.text == "2. ": abilities["secondary_ability"] = Ability(ability_link.text) if len(abilities) == 0: if default: args["abilities"] = default.abilities else: return None else: args["abilities"] = AbilityList(**abilities) # elif header == "National №": col = row.find("td").find("strong") args["dex_entries"].add_entry(DexEntry(Dex.NATIONAL, int(col.text))) # elif header == "Local №": col = row.find("td") if not col.text: continue curr_num = int(col.text) for tag in col: if tag.tag == "br": curr_num = int(tag.tail) if tag.tail else None else: args["dex_entries"].add_entry( DexEntry(Dex(tag.text[1:-1]), curr_num)) for row in training: header = row.find("th").text if header == "EV yield": col = row.find("td") if not col.text.strip(): continue stats = (s.strip() for s in col.text.split(",")) ev_yield = EVYield( *(({ "Attack": Stat.ATTACK, "Defense": Stat.DEFENSE, "Special Attack": Stat.SP_ATTACK, "Special Defense": Stat.SP_DEFENSE, "Speed": Stat.SPEED, "HP": Stat.HP }[stat], int(value)) for stat_info in stats for value, stat in (stat_info.split(" ", maxsplit=1), ))) args["misc_info"].ev_yield = ev_yield # # # Get evolution line if args["name"].name in ["Nincada", "Ninjask", "Shedinja"]: args["misc_info"].evolution_line = \ EvolutionLine("NINCADA", (Evolution("NINCADA", "NINJASK", LevelUpEvolutionType(20)), EvolutionLine("NINJASK")), (Evolution("NINCADA", "SHEDINJA", UnknownEvolutionType()), EvolutionLine("SHEDINJA"))) else: evo_lines = [] for evo_line_info in root.findall( f".//div[@class='infocard-list-evo']"): evo_line = _scrape_evolution_line(evo_line_info) if isinstance(evo_line, EvolutionLine): evo_lines.append(evo_line) evo_lines = list(EvolutionLine.merge(*evo_lines)) formatted_name = FormatUtils.format_name_as_id(args["name"], args["variant"], ignore_mega=True) evo_lines = [ evl for evl in evo_lines if formatted_name in evl.get_all_pokemon_ids_in_line() ] if len(evo_lines) == 1: args["misc_info"].evolution_line = evo_lines[0] elif len(evo_lines) > 1: print("MULTIPLE EVO LINES FOUND") # # # Get stats stats_div = tab_div.find(".//*[@id='dex-stats']...") stats_table = stats_div.find(".//table") if stats_table is None: return None stats_table = stats_table.find("tbody") stats = dict() for row in stats_table: header = row.find("th").text value = int(row.find("td").text) stat = { "HP": "hp", "Attack": "attack", "Defense": "defense", "Sp. Atk": "special_attack", "Sp. Def": "special_defense", "Speed": "speed" }[header] stats[stat] = value args["stats"] = BaseStats(**stats) # return PokemonData(**args)