def parse_activity( activity: etree._Element, ) -> Tuple[List[backend.events.AcademicalEvent], str, str, str]: """ Parses an element from a request into a list of events and some activity information. :param activity: the activity element :type activity: etree._Element :return: the events, the name, the id and the code of this activity :rtype: Tuple[List[backend.events.AcademicalEvent], str, str, str] """ activity_id = activity.attrib["name"] activity_type = activity.attrib["type"] activity_name = activity.attrib["code"] event_type = backend.events.extract_type(activity_type, activity_id) event_codes = activity.xpath( './/eventParticipant[@category="category5"]/@name') events = activity.xpath(".//event") events_list = list() if len(event_codes) == 0: activity_code = backend.events.extract_code(activity_id) else: activity_code = Counter(event_codes).most_common()[0][0] if activity_code == "": activity_code = "Other" for event in events: events_list.append( parse_event(event, event_type, activity_name, activity_id, activity_code)) return events_list, activity_name, activity_id, activity_code
def _parse_search_item(self, html: _Element, metadata: dict) -> Optional[PeopleItem]: element = html.xpath('li/b/a')[0] title = element.text url = element.attrib['href'] abstract = html.xpath('li[2]')[0].text try: element = html.xpath('li[3]/a') element = element[0] publish_str = element.tail.strip() publish = datetime.datetime.strptime(publish_str, '%Y-%m-%d %H:%M:%S') except: return None item = PeopleItem() item.title = title item.url = url item.keyword = metadata.get('keyword', '') item.abstract = abstract item.publish = publish return item
def _parse_search_item(self, html: _Element, metadata: dict) -> Optional[CnrItem]: element = html.xpath('div[1]/a')[0] title = utility.get_element_str(element) url = element.attrib['href'] element = html.xpath('div[2]') abstract = '' if element: abstract = utility.get_element_str(element[0]) try: element = html.xpath('div/span[@class="searchresulturl"]') element = element[0] publish_str = element.tail.strip() publish = datetime.datetime.strptime(publish_str, '%Y.%m.%d %H:%M:%S') except: return None item = CnrItem() item.title = title item.url = url item.abstract = abstract item.keyword = metadata.get('keyword', '') item.publish = publish return item
def is_supported_element( element: etree._Element, ) -> bool: if "ref" in element.attrib: ref = element.attrib["ref"] target, = element.xpath(f'//*[@id="{ref}"]') return is_supported_element(target) if element.tag in {"function", "record"}: def valid_component( component: etree._Element, ) -> bool: className, _ = ClassNameAndDimensions.from_component( component ) if className in { TypeName("Real"), TypeName("Integer"), TypeName("Boolean"), TypeName("String"), TypeName("VariableName"), TypeName("TypeName"), }: return True class_ = element.xpath(f'//*[@id="{className!s}"]') return bool(class_) and is_supported_element(class_[0]) return all(map(valid_component, element.xpath("./components/*/*"))) return True
def parse_game_mode(mode: etree._Element): general_stats = {} for stat in mode.xpath( './section/div/div[@data-category-id and @data-group-id="comparisons"]' ): stat_dict = {} for hero in stat: bar_description = hero.xpath( './div/div/div[@class="description"]')[0].text bar_percentage = hero.get('data-overwatch-progress-percent') stat_dict[id_hero[hero.get('data-hero-guid')]] = ( bar_description, bar_percentage) general_stats[id_stat[stat.get('data-category-id')]] = stat_dict hero_stats = {} for hero in mode.xpath( './section/div/div[@data-category-id and @data-group-id="stats"]' ): hero_dict = {} for stat_group in hero: stat_group_name = stat_group.xpath( './div/table/thead/tr/th/h5[@class="stat-title"]')[0].text stat_dict = {} for stat in stat_group.xpath('./div/table/tbody')[0]: stat_name = stat[0].text stat_value = stat[1].text stat_dict[stat_name] = stat_value hero_dict[stat_group_name] = stat_dict hero_stats[id_hero[hero.get('data-category-id')]] = hero_dict return {'general_stats': general_stats, 'hero_stats': hero_stats}
def _parse_search_item(self, html: _Element, metadata: dict) -> Optional[ChinaNewsItem]: element = html.xpath('.//li[contains(@class, "news_title")]/a')[0] title = utility.get_element_str(element) url = element.attrib['href'] abstract = utility.get_element_str( html.xpath('.//li[@class="news_content"]')[0]) try: element = html.xpath('.//li[@class="news_other"]') element = element[0] publish_str = element.text.strip() publish_str = publish_str.split('\t')[-1] publish = datetime.datetime.strptime(publish_str, '%Y-%m-%d %H:%M:%S') except: return None item = ChinaNewsItem() item.title = title item.url = url item.keyword = metadata.get('keyword', '') item.abstract = abstract item.publish = publish return item
def get_publish_date_range(self, date: etree._Element): if exists(date, '@from'): date = {'start': date.xpath('@start')[0]} if exists(date, '@to'): date['to'] = date.xpath('@to')[0] return date else: return date.xpath('text()')[0]
def get_publish_date(self, tei_header: etree._Element): range = tei_header.xpath('tei:fileDesc/tei:editionStmt/tei:edition/tei:date[@type = "summaryDigitizedEd"]', namespaces=xml_ns) date = tei_header.xpath('tei:fileDesc/tei:editionStmt/tei:edition/tei:date[@type = "digitizedEd"]/text()', namespaces=xml_ns) if len(range): return self.get_publish_date_range(range[0]) else: return date[0]
def get_source_publish_place(self, tei_header: etree._Element): this_place = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' + 'tei:imprint/tei:pubPlace[@role = "thisEd"]', namespaces=xml_ns) first_place = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' + 'tei:imprint/tei:pubPlace[@role = "firstEd"]', namespaces=xml_ns) if len(this_place): return self.get_place_name(this_place[0]) else: return self.get_place_name(first_place[0])
def get(self, line: ET._Element) -> dict: if line.tag != f"{ALTO}TextLine": raise KeyError block = line.xpath(f"./ancestor::alto:TextBlock", namespaces=ALTO_NS)[0] # TextBlock block_lines = block.findall(f".//{ALTO}TextLine") line_index = block_lines.index(line) line_text = misc.get_text(line).strip() line_words = line_text.split(" ") line_h = float(line.get("HPOS")) line_v = float(line.get("VPOS")) line_w = float(line.get("WIDTH")) line_height = float(line.get("HEIGHT")) block_h = float(block.get("HPOS")) block_w = float(block.get("WIDTH")) if line_index > 0: previous_line = block_lines[line_index - 1] previous_line_v = float(previous_line.get("VPOS")) + float( previous_line.get("HEIGHT")) else: previous_line_v = line_v if line_index < len(block_lines) - 1: next_line = block_lines[line_index + 1] next_line_v = float(next_line.get("VPOS")) else: next_line_v = line_v + line_height f: Dict[str, Any] = {} # geometry f["#line_position"] = get_status(line, relative_to=f"alto:TextBlock") # f["position_h"] = line_h # f["position_v"] = line_v f["prev_delta_h"] = line_h - block_h f["next_delta_h"] = block_h + block_w - (line_h + line_w) f["prev_delta_v"] = line_v - previous_line_v f["next_delta_v"] = next_line_v - (line_v + line_height) f["repetitive"] = False f["repetitive_first"] = False if line_index < 2 or line_index >= len(block_lines) - 1: pattern = misc.get_pattern(line_text) f["repetitive"] = (pattern in self.patterns) and ( self.patterns[pattern] >= 2) if pattern in self.patterns: page = line.xpath(f"./ancestor::alto:Page", namespaces=ALTO_NS)[0] f["repetitive_first"] = self.patterns_first[ pattern] == page.get("PHYSICAL_IMG_NR") return f
def make_title(self, node: etree._Element) -> str: if exists('self::tei:charDecl'): return str( node.xpath( 'ancestor::tei:teiheader/tei:fileDesc/tei:titleStmt/tei:title[@xml:lang = "en"]', namespaces=xml_ns)[0]) # TODO i18n elif exists('self::tei:char/tei:desc'): return str(node.xpath('tei:desc/text()', namespaces=xml_ns)[0]) else: return None
def get_ms_msdiv(cls, paragraph: _Element) -> Tuple[MsId, PaliMsDivId]: a_ms = paragraph.xpath("./a[@class='ms']")[0] ms_id = MsId.from_xml_id(a_ms.get("id", "")) msdiv_id = PaliMsDivId("") try: a_msdiv = paragraph.xpath("./a[@class='msdiv']")[0] msdiv_id = PaliMsDivId(a_msdiv.get("id", "").strip()) except IndexError: log.trace("No msdiv if for ms: '%s'", ms_id) return ms_id, msdiv_id
def replace_img_links(self, item: etree._Element, replace_with: str): for enclosure in cast(List[etree._Element], item.xpath(".//enclosure")): # media:content tag enclosure.attrib["url"] = replace_with % enclosure.attrib["url"] for media in cast(List[etree._Element], item.xpath(".//*[local-name()='content'][@url]")): media.attrib["url"] = replace_with % quote_plus( cast(str, media.attrib["url"]))
def get_source_publishers(self, tei_header: etree._Element): this_publishers = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' + 'tei:imprint/tei:publisher[@n = "firstEd"]/tei:persName', namespaces=xml_ns) first_publishers = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' + 'tei:imprint/tei:publisher[@n = "firstEd"]/tei:persName', namespaces=xml_ns) if len(this_publishers): return this_publishers else: return first_publishers
def xpath(dom: etree._Element, xpath_query: str, namespaces=None) -> List[etree._Element]: nodes: List[etree._Element] = [] if dom is not None: if namespaces is None: nodes = cast(List[etree._Element], dom.xpath(xpath_query)) else: nodes = cast(List[etree._Element], dom.xpath(xpath_query, namespaces=namespaces)) return nodes
def parse_event( event: etree._Element, event_type: backend.events.AcademicalEvent, activity_name: str, activity_id: str, activity_code: str, ) -> backend.events.AcademicalEvent: """ Parses an element from a request into an academical event. An event is from an activity so information about this activity must be provided. :param event: the event element :type event: etree._Element :param event_type: the constructor used to initiate to event object :type event_type: Type[backend.events.AcademicalEvent] :param activity_name: the name of the activity :type activity_name: str :param activity_id: the id of the activity :type activity_id: str :param activity_code: the code of the activity :type activity_code: str :return: the academical event :rtype: backend.events.AcademicalEvent """ event_date = event.attrib["date"] event_start = event.attrib["startHour"] event_end = event.attrib["endHour"] rooms = event.xpath('.//eventParticipant[@category="classroom"]') classrooms = [room_to_classroom(room) for room in rooms] instructors = list() for instructor in event.xpath( './/eventParticipant[@category="instructor"]'): instructors.append( professors.Professor(instructor.attrib["name"], None)) event_instructor = professors.merge_professors(instructors) # We create the event t0, t1 = backend.events.extract_datetime(event_date, event_start, event_end) return event_type( name=activity_name, begin=t0, end=t1, professor=event_instructor, classrooms=classrooms, id=activity_id, code=activity_code, )
def fill_out_untranscribed_commentary_markup(root: _Element): '''convert abbreviated "<comm/>" and "<comm lines=4> tags into IGNTP equivalent markup''' tei_ns = 'http://www.tei-c.org/ns/1.0' xml_ns = 'http://www.w3.org/XML/1998/namespace' comms = root.xpath(f'//tei:comm', namespaces={'tei': tei_ns}) # type: List[_Element] for comm in comms: if comm.get('lines'): index = comm.getparent().index(comm) try: lines = int(comm.get('lines')) except: lines = 1 for _ in range(lines): parent = comm.getparent() #type: _Element comm_element = ET.Element('note', type='commentary', nsmap={ None: tei_ns, 'xml': xml_ns }) comm_element.text = 'One line of untranscribed commentary text' lb = ET.Element('lb', nsmap={None: tei_ns, 'xml': xml_ns}) parent.insert(index, lb) parent.insert(index + 1, comm_element) comm.getparent().remove(comm) else: comm.tag = 'note' comm.attrib['type'] = 'commentary' comm.text = 'untranscribed commentary text' return root
def add_page_break_type(root: _Element): '''adds type="folio" to the page break element for a very minor convenvience''' tei_ns = 'http://www.tei-c.org/ns/1.0' for pb in root.xpath('//tei:pb', namespaces={'tei': tei_ns}): pb.attrib['type'] = 'folio' return root
def get_value( tag_name: str, context_element: _Element, name: str, default: Optional[str] = None, ) -> Optional[str]: """ Return a value from an nvpair WARNING: does not solve multiple nvsets (with the same tag_name) in the context_element nor multiple nvpair with the same name tag_name -- "instance_attributes" or "meta_attributes" context_element -- searched element name -- nvpair name default -- default return value """ value_list = context_element.xpath( """ ./*[local-name()=$tag_name] /nvpair[ @name=$name and string-length(@value) > 0 ] /@value """, tag_name=tag_name, name=name, ) return cast(List[str], value_list)[0] if value_list else default
def test_images(xhtml_file: Path, xhtml: _Element) -> bool: """ Test the that all 'img' links are not broken. If settings.test_images is True then also use PIL to test if the image files are valid. :param xhtml_file: the XHTML file's path :param xhtml: the XHTML files' root :return: True if the images are okay """ success = True for img in xhtml.xpath("//xhtml:img", namespaces=XMLNS): src = str(img.attrib["src"]) if ":" not in src: img_path = xhtml_file.parent / Path(url2pathname(src)) if settings.verbose: print("\t", img_path) if not img_path.is_file(): print(f"{xhtml_file}:1:0: missing image {img_path}", file=stderr) success = False elif settings.test_images: try: Image.open(img_path).verify() except IOError: print(f"{xhtml_file}:1:0: invalid image {img_path}", file=stderr) success = False return success
def _parse_content(self, html: _Element) -> str: element = html.xpath('p[@class="txt"]')[-1] content = [] if element.text is not None: content.append(element.text.strip()) for child in element.getchildren(): if child.tag == 'em': content.append(child.text.strip()) elif child.tag == 'a': if child.text is not None: content.append(child.text.strip()) for sub_child in child.getchildren(): if sub_child.tag == 'i': continue if sub_child.text: content.append(sub_child.text.strip()) if sub_child.tail is not None: content.append(sub_child.tail.strip()) elif child.text is not None: content.append(child.text.strip()) if child.tail is not None: content.append(child.tail.strip()) return ''.join(content)
def get_citable_ancestors(self, node: etree._Element, node_type: str, mode: str): """ Gets all citetrail or passagetrail ancestors of a node (switch modes: 'citetrail' vs 'passagetrail'). """ tei_ancestors = node.xpath('ancestor::*') ancestors = [] if node_type == 'marginal' or node_type == 'anchor': # marginals and anchors must not have p (or some other "main" node) as their parent for anc in tei_ancestors: if (mode == 'citetrail' or (mode == 'passagetrail' and self.is_passagetrail_node(anc))) \ and self.is_structural_node(anc): ancestors.append(anc) elif node_type == 'page': # within front, back, and single volumes, citable parent resolves to one of those elements for avoiding # collisions with identically named pb in other parts for anc in tei_ancestors: if (mode == 'citetrail' or (mode == 'passagetrail' and self.is_passagetrail_node(anc))) \ and exists(anc, 'self::tei:front or self::tei:back' + ' or self::tei:text[1][not(@xml:id = "completeWork" or @type = "work_part")]'): ancestors.append(anc) # note: this makes all other pb appear outside of any structural hierarchy, but this should be fine else: for anc in tei_ancestors: if (mode == 'citetrail' or (mode == 'passagetrail' and self.is_passagetrail_node(anc))) \ and self.get_node_type(anc): ancestors.append(anc) return ancestors[::-1] # ancestors.reverse() is not working here
def _parse_comment(self, css_url: str, html: _Element) -> str: elements = html.xpath( 'div[@class="main-review"]/div[contains(@class, "review-words")]') element = elements[0] comment = [] if element.text is not None: comment.append(element.text.strip()) for child in element.getchildren(): if child.tag == 'div': break if child.tag == 'img': # 图片的text也为None,但tail可能有文字 pass elif child.text is None: svg_url, x, y = self.css_parser.get_position( css_url, child.tag, child.attrib['class']) self.svg_parser.append_svg(svg_url) text = self.svg_parser.parse(svg_url, x, y) comment.append(text) if child.tail is not None: comment.append(child.tail.strip()) return ''.join(comment)
def bl_parse_achievement_data(parsed: etree._Element, mode="quickplay"): # Start the dict. built_dict = {} _root = parsed.xpath( ".//section[@id='achievements-section']" ) if not _root: return _root = _root[0] _category_selects = _root.xpath(".//select[@data-group-id='achievements']")[0].xpath(".//option") for _category_select in _category_selects: category_name = _category_select.text category_id = _category_select.get("value") _achievement_boxes = _root.xpath( ".//div[@data-group-id='achievements' and @data-category-id='{0}']/ul/div/div[@data-tooltip]".format( category_id)) n_dict = {} for _achievement_box in _achievement_boxes: achievement_name = _achievement_box.xpath("./div/div")[0].text if achievement_name == '?': # Sombra ARG clue, not a real achievement continue n_dict[util.sanitize_string(achievement_name)] = "m-disabled" not in _achievement_box.get("class") built_dict[category_name.lower()] = n_dict return built_dict
def _get_lrm_rsc_op_elements( cib: _Element, resource_id: str, node_name: str, op_name: str, interval: Optional[str] = None, ) -> List[_Element]: """ Get a lrm_rsc_op element from cib status. resource_id -- resource id whose belonging element we want to find node_name -- name of the node where resource is running op_name -- operation name (start or monitor) interval -- operation interval using for monitor operation selection """ return cast( List[_Element], cib.xpath( """ ./status/node_state[@uname=$node_name] /lrm/lrm_resources/lrm_resource[@id=$resource_id] /lrm_rsc_op[@operation=$op_name{interval}] """.format( interval=" and @interval=$interval" if interval else ""), node_name=node_name, resource_id=resource_id, op_name=op_name, interval=interval if interval else "", ), )
def _parse_review_count(self, html: _Element, num_font_url: str) -> int: elements = html.xpath('//span[@id="reviewCount"]') content = self._parse_number(elements[0], num_font_url) matches = self.review_count_pattern.findall(content) if len(matches) != 1: raise Exception(f'Not found review count from {content}') return int(matches[0])
def _rule_to_dto(rule_el: _Element) -> CibRuleExpressionDto: children_dto_list = [ _tag_to_export[str(child.tag)](child) # The xpath method has a complicated return value, but we know our xpath # expression only returns elements. for child in cast(_Element, rule_el.xpath(_xpath_for_export)) ] # "and" is a documented pacemaker default # https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html-single/Pacemaker_Explained/index.html#_rule_properties boolean_op = str(rule_el.get("boolean-op", "and")) string_parts = [] for child_dto in children_dto_list: if child_dto.type == CibRuleExpressionType.RULE: string_parts.append(f"({child_dto.as_string})") else: string_parts.append(child_dto.as_string) return CibRuleExpressionDto( str(rule_el.get("id", "")), _tag_to_type[str(rule_el.tag)], False, # TODO implement is_expired export_attributes(rule_el, with_id=False), None, None, children_dto_list, f" {boolean_op} ".join(string_parts), )
def get_title(self, item: etree._Element) -> Optional[etree._Element]: title: Optional[etree._Elements] = None for t in cast(List[etree._Element], item.xpath(".//title")): title = t break return title
def bl_parse_achievement_data(parsed: etree._Element, mode="quickplay"): # Start the dict. built_dict = {} _root = parsed.xpath(".//section[@id='achievements-section']") if not _root: return _root = _root[0] _category_selects = _root.xpath( ".//select[@data-group-id='achievements']")[0].xpath(".//option") for _category_select in _category_selects: category_name = _category_select.text category_id = _category_select.get("value") _achievement_boxes = _root.xpath( ".//div[@data-group-id='achievements' and @data-category-id='{0}']" "/ul/div/div[@data-tooltip]".format(category_id)) n_dict = {} for _achievement_box in _achievement_boxes: achievement_name = _achievement_box.xpath("./div/div")[0].text if achievement_name == "?": # Sombra ARG clue, not a real achievement continue n_dict[util.sanitize_string( achievement_name)] = "m-disabled" not in _achievement_box.get( "class") built_dict[category_name.lower()] = n_dict return built_dict
def _parse_topic_name_and_url(self, html: _Element) -> (str, str): elements = html.xpath('div/a[@class="name"]') if not elements: raise Exception('Failed to find topic name.') name = elements[0].text url = elements[0].attrib['href'] return name, url
def xml_get_opt( item: XMLElement, path: Optional[str] = None, attr: Optional[str] = None, *, xpath: bool = False, ) -> Optional[str]: """ Helper for LXML to get an attribute at a path from an XML element. Returns None if the path or attribute do not exist. """ if path: if xpath: elems = item.xpath(path) assert isinstance(elems, list) elem = elems[0] if elems else None assert elem is None or isinstance(elem, XMLElement) else: elem = item.find(path) else: elem = item if elem is None: return None ret = elem.get(attr) if attr else elem.text if ret is None: return None if isinstance(ret, bytes): raise ValueError("Expected result to be a str not bytes.") return ret
def bl_parse_hero_data(parsed: etree._Element, mode="quickplay"): # Start the dict. built_dict = {} _root = parsed.xpath( ".//div[@id='{}']".format("competitive" if mode == "competitive" else "quickplay") ) if not _root: return for hero_name, requested_hero_div_id in hero_data_div_ids.items(): n_dict = {} _stat_groups = _root[0].xpath( ".//div[@data-group-id='stats' and @data-category-id='{0}']".format(requested_hero_div_id) ) if not _stat_groups: continue stat_groups = _stat_groups[0] _t_d = {} hero_specific_box = stat_groups[0] trs = hero_specific_box.findall(".//tbody/tr") # Update the dict with [0]: [1] for subval in trs: name, value = util.sanitize_string(subval[0].text), subval[1].text if 'average' in name.lower(): # No averages, ty continue nvl = util.try_extract(value) _t_d[name] = nvl n_dict["hero_stats"] = _t_d _t_d = {} for subbox in stat_groups[1:]: trs = subbox.findall(".//tbody/tr") # Update the dict with [0]: [1] for subval in trs: name, value = util.sanitize_string(subval[0].text), subval[1].text if 'average' in name.lower(): # No averages, ty continue nvl = util.try_extract(value) _t_d[name] = nvl n_dict["general_stats"] = _t_d built_dict[hero_name] = n_dict return built_dict
def bl_find_heroes(parsed: etree._Element): # Start the dict. built_dict = {"role": "", "difficulty": "", "abilities": {}} difficulty = len(parsed.findall(".//span[@class='star']")) role = parsed.xpath(".//h4[@class='h2 hero-detail-role-name']")[0].text _abilities = parsed.findall(".//div[@class='hero-ability-descriptor']") abilities = {} for ability in _abilities: name, description = ability[0].text, ability[1].text abilities[name] = description built_dict["difficulty"] = difficulty built_dict["role"] = role built_dict["abilities"] = abilities return built_dict
def bl_parse_hero_data(parsed: etree._Element, mode="quickplay"): # Start the dict. built_dict = {} _root = parsed.xpath( ".//div[@id='{}']".format("competitive" if mode == "competitive" else "quickplay") ) if not _root: return None try: # XPath for the `u-align-center` h6 which signifies there's no data. no_data = _root[0].xpath(".//ul/h6[@class='u-align-center']".format(mode))[0] except IndexError: pass else: if no_data.text.strip() == "We don't have any data for this account in this mode yet.": return None for hero_name, requested_hero_div_id in hero_data_div_ids.items(): n_dict = {} _stat_groups = _root[0].xpath( ".//div[@data-group-id='stats' and @data-category-id='{0}']" .format(requested_hero_div_id) ) if not _stat_groups: continue stat_groups = _stat_groups[0] _average_stats = {} _t_d = {} # offset for subboxes # if there IS a hero-specific box, we need to scan all boxes from offset to end # because the hero-specific box is first. # if there is NOT, we scan all boxes later. # this is determined by the xpath to find the Hero Specific page. subbox_offset = 0 # .find on the assumption hero box is the *first* item hbtitle = stat_groups.find(".//span[@class='stat-title']").text if hbtitle == "Hero Specific": subbox_offset = 1 hero_specific_box = stat_groups[0] trs = hero_specific_box.findall(".//tbody/tr") # Update the dict with [0]: [1] for subval in trs: name, value = util.sanitize_string(subval[0].text), subval[1].text # Put averages into average_stats if "average" in name: into = _average_stats else: into = _t_d nvl = util.try_extract(value) into[name] = nvl n_dict["hero_stats"] = _t_d _t_d = {} for subbox in stat_groups[subbox_offset:]: trs = subbox.findall(".//tbody/tr") # Update the dict with [0]: [1] for subval in trs: name, value = util.sanitize_string(subval[0].text), subval[1].text # Put averages into average_stats if "average" in name: into = _average_stats else: into = _t_d nvl = util.try_extract(value) into[name] = nvl n_dict["general_stats"] = _t_d n_dict["average_stats"] = _average_stats built_dict[hero_name] = n_dict return built_dict