Python _Element.xpath Examples, lxml.etree._Element.xpath Python Examples

Example #1

0

Show file

File: ade_api.py Project: raftar2097/ADE-Scheduler

def parse_activity(
    activity: etree._Element,
) -> Tuple[List[backend.events.AcademicalEvent], str, str, str]:
    """
    Parses an element from a request into a list of events and some activity information.

    :param activity: the activity element
    :type activity: etree._Element
    :return: the events, the name, the id and the code of this activity
    :rtype: Tuple[List[backend.events.AcademicalEvent], str, str, str]
    """
    activity_id = activity.attrib["name"]
    activity_type = activity.attrib["type"]
    activity_name = activity.attrib["code"]

    event_type = backend.events.extract_type(activity_type, activity_id)
    event_codes = activity.xpath(
        './/eventParticipant[@category="category5"]/@name')
    events = activity.xpath(".//event")
    events_list = list()

    if len(event_codes) == 0:
        activity_code = backend.events.extract_code(activity_id)
    else:
        activity_code = Counter(event_codes).most_common()[0][0]
    if activity_code == "":
        activity_code = "Other"

    for event in events:
        events_list.append(
            parse_event(event, event_type, activity_name, activity_id,
                        activity_code))

    return events_list, activity_name, activity_id, activity_code

Example #2

0

Show file

    def _parse_search_item(self, html: _Element,
                           metadata: dict) -> Optional[PeopleItem]:
        element = html.xpath('li/b/a')[0]
        title = element.text
        url = element.attrib['href']

        abstract = html.xpath('li[2]')[0].text

        try:
            element = html.xpath('li[3]/a')
            element = element[0]
            publish_str = element.tail.strip()
            publish = datetime.datetime.strptime(publish_str,
                                                 '%Y-%m-%d %H:%M:%S')
        except:
            return None

        item = PeopleItem()
        item.title = title
        item.url = url
        item.keyword = metadata.get('keyword', '')
        item.abstract = abstract
        item.publish = publish

        return item

Example #3

0

Show file

    def _parse_search_item(self, html: _Element,
                           metadata: dict) -> Optional[CnrItem]:
        element = html.xpath('div[1]/a')[0]
        title = utility.get_element_str(element)
        url = element.attrib['href']

        element = html.xpath('div[2]')
        abstract = ''
        if element:
            abstract = utility.get_element_str(element[0])

        try:
            element = html.xpath('div/span[@class="searchresulturl"]')
            element = element[0]
            publish_str = element.tail.strip()
            publish = datetime.datetime.strptime(publish_str,
                                                 '%Y.%m.%d %H:%M:%S')
        except:
            return None

        item = CnrItem()
        item.title = title
        item.url = url
        item.abstract = abstract
        item.keyword = metadata.get('keyword', '')
        item.publish = publish

        return item

Example #4

0

Show file

File: __init__.py Project: ijknabla/OpenModelicaCompilerForPython

def is_supported_element(
    element: etree._Element,
) -> bool:
    if "ref" in element.attrib:
        ref = element.attrib["ref"]
        target, = element.xpath(f'//*[@id="{ref}"]')
        return is_supported_element(target)

    if element.tag in {"function", "record"}:
        def valid_component(
            component: etree._Element,
        ) -> bool:
            className, _ = ClassNameAndDimensions.from_component(
                component
            )
            if className in {
                TypeName("Real"),
                TypeName("Integer"),
                TypeName("Boolean"),
                TypeName("String"),
                TypeName("VariableName"),
                TypeName("TypeName"),
            }:
                return True

            class_ = element.xpath(f'//*[@id="{className!s}"]')
            return bool(class_) and is_supported_element(class_[0])

        return all(map(valid_component, element.xpath("./components/*/*")))

    return True

Example #5

0

Show file

    def parse_game_mode(mode: etree._Element):
        general_stats = {}
        for stat in mode.xpath(
                './section/div/div[@data-category-id and @data-group-id="comparisons"]'
        ):
            stat_dict = {}
            for hero in stat:
                bar_description = hero.xpath(
                    './div/div/div[@class="description"]')[0].text
                bar_percentage = hero.get('data-overwatch-progress-percent')
                stat_dict[id_hero[hero.get('data-hero-guid')]] = (
                    bar_description, bar_percentage)
            general_stats[id_stat[stat.get('data-category-id')]] = stat_dict

        hero_stats = {}
        for hero in mode.xpath(
                './section/div/div[@data-category-id and @data-group-id="stats"]'
        ):
            hero_dict = {}
            for stat_group in hero:
                stat_group_name = stat_group.xpath(
                    './div/table/thead/tr/th/h5[@class="stat-title"]')[0].text
                stat_dict = {}
                for stat in stat_group.xpath('./div/table/tbody')[0]:
                    stat_name = stat[0].text
                    stat_value = stat[1].text
                    stat_dict[stat_name] = stat_value
                hero_dict[stat_group_name] = stat_dict
            hero_stats[id_hero[hero.get('data-category-id')]] = hero_dict

        return {'general_stats': general_stats, 'hero_stats': hero_stats}

Example #6

0

Show file

    def _parse_search_item(self, html: _Element,
                           metadata: dict) -> Optional[ChinaNewsItem]:
        element = html.xpath('.//li[contains(@class, "news_title")]/a')[0]
        title = utility.get_element_str(element)
        url = element.attrib['href']

        abstract = utility.get_element_str(
            html.xpath('.//li[@class="news_content"]')[0])

        try:
            element = html.xpath('.//li[@class="news_other"]')
            element = element[0]
            publish_str = element.text.strip()
            publish_str = publish_str.split('\t')[-1]
            publish = datetime.datetime.strptime(publish_str,
                                                 '%Y-%m-%d %H:%M:%S')
        except:
            return None

        item = ChinaNewsItem()
        item.title = title
        item.url = url
        item.keyword = metadata.get('keyword', '')
        item.abstract = abstract
        item.publish = publish

        return item

Example #7

0

Show file

File: metadata.py Project: dvglc/svsal-factory

 def get_publish_date_range(self, date: etree._Element):
     if exists(date, '@from'):
         date = {'start': date.xpath('@start')[0]}
         if exists(date, '@to'):
             date['to'] = date.xpath('@to')[0]
         return date
     else:
         return date.xpath('text()')[0]

Example #8

0

Show file

File: metadata.py Project: dvglc/svsal-factory

 def get_publish_date(self, tei_header: etree._Element):
     range = tei_header.xpath('tei:fileDesc/tei:editionStmt/tei:edition/tei:date[@type = "summaryDigitizedEd"]',
                              namespaces=xml_ns)
     date = tei_header.xpath('tei:fileDesc/tei:editionStmt/tei:edition/tei:date[@type = "digitizedEd"]/text()',
                             namespaces=xml_ns)
     if len(range):
         return self.get_publish_date_range(range[0])
     else:
         return date[0]

Example #9

0

Show file

File: metadata.py Project: dvglc/svsal-factory

 def get_source_publish_place(self, tei_header: etree._Element):
     this_place = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' +
                                   'tei:imprint/tei:pubPlace[@role = "thisEd"]', namespaces=xml_ns)
     first_place = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' +
                                    'tei:imprint/tei:pubPlace[@role = "firstEd"]', namespaces=xml_ns)
     if len(this_place):
         return self.get_place_name(this_place[0])
     else:
         return self.get_place_name(first_place[0])

Example #10

0

Show file

File: TextLine.py Project: PierreSenellart/theoremkb

    def get(self, line: ET._Element) -> dict:
        if line.tag != f"{ALTO}TextLine":
            raise KeyError

        block = line.xpath(f"./ancestor::alto:TextBlock",
                           namespaces=ALTO_NS)[0]  # TextBlock
        block_lines = block.findall(f".//{ALTO}TextLine")
        line_index = block_lines.index(line)

        line_text = misc.get_text(line).strip()
        line_words = line_text.split(" ")

        line_h = float(line.get("HPOS"))
        line_v = float(line.get("VPOS"))
        line_w = float(line.get("WIDTH"))
        line_height = float(line.get("HEIGHT"))

        block_h = float(block.get("HPOS"))
        block_w = float(block.get("WIDTH"))

        if line_index > 0:
            previous_line = block_lines[line_index - 1]
            previous_line_v = float(previous_line.get("VPOS")) + float(
                previous_line.get("HEIGHT"))
        else:
            previous_line_v = line_v

        if line_index < len(block_lines) - 1:
            next_line = block_lines[line_index + 1]
            next_line_v = float(next_line.get("VPOS"))
        else:
            next_line_v = line_v + line_height

        f: Dict[str, Any] = {}
        # geometry
        f["#line_position"] = get_status(line, relative_to=f"alto:TextBlock")
        # f["position_h"]     = line_h
        # f["position_v"]     = line_v
        f["prev_delta_h"] = line_h - block_h
        f["next_delta_h"] = block_h + block_w - (line_h + line_w)
        f["prev_delta_v"] = line_v - previous_line_v
        f["next_delta_v"] = next_line_v - (line_v + line_height)

        f["repetitive"] = False
        f["repetitive_first"] = False
        if line_index < 2 or line_index >= len(block_lines) - 1:
            pattern = misc.get_pattern(line_text)
            f["repetitive"] = (pattern in self.patterns) and (
                self.patterns[pattern] >= 2)
            if pattern in self.patterns:
                page = line.xpath(f"./ancestor::alto:Page",
                                  namespaces=ALTO_NS)[0]
                f["repetitive_first"] = self.patterns_first[
                    pattern] == page.get("PHYSICAL_IMG_NR")

        return f

Example #11

0

Show file

 def make_title(self, node: etree._Element) -> str:
     if exists('self::tei:charDecl'):
         return str(
             node.xpath(
                 'ancestor::tei:teiheader/tei:fileDesc/tei:titleStmt/tei:title[@xml:lang = "en"]',
                 namespaces=xml_ns)[0])  # TODO i18n
     elif exists('self::tei:char/tei:desc'):
         return str(node.xpath('tei:desc/text()', namespaces=xml_ns)[0])
     else:
         return None

Example #12

0

Show file

File: extractors.py Project: suttacentral/sc-renumber-segments

 def get_ms_msdiv(cls, paragraph: _Element) -> Tuple[MsId, PaliMsDivId]:
     a_ms = paragraph.xpath("./a[@class='ms']")[0]
     ms_id = MsId.from_xml_id(a_ms.get("id", ""))
     msdiv_id = PaliMsDivId("")
     try:
         a_msdiv = paragraph.xpath("./a[@class='msdiv']")[0]
         msdiv_id = PaliMsDivId(a_msdiv.get("id", "").strip())
     except IndexError:
         log.trace("No msdiv if for ms: '%s'", ms_id)
     return ms_id, msdiv_id

Example #13

0

Show file

    def replace_img_links(self, item: etree._Element, replace_with: str):
        for enclosure in cast(List[etree._Element],
                              item.xpath(".//enclosure")):
            # media:content tag
            enclosure.attrib["url"] = replace_with % enclosure.attrib["url"]

        for media in cast(List[etree._Element],
                          item.xpath(".//*[local-name()='content'][@url]")):
            media.attrib["url"] = replace_with % quote_plus(
                cast(str, media.attrib["url"]))

Example #14

0

Show file

File: metadata.py Project: dvglc/svsal-factory

 def get_source_publishers(self, tei_header: etree._Element):
     this_publishers = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' +
                                        'tei:imprint/tei:publisher[@n = "firstEd"]/tei:persName',
                                        namespaces=xml_ns)
     first_publishers = tei_header.xpath('tei:fileDesc/tei:sourceDesc/tei:biblStruct/tei:monogr/' +
                                         'tei:imprint/tei:publisher[@n = "firstEd"]/tei:persName',
                                         namespaces=xml_ns)
     if len(this_publishers):
         return this_publishers
     else:
         return first_publishers

Example #15

0

Show file

File: dom_utils.py Project: yawks/pyrssw

def xpath(dom: etree._Element,
          xpath_query: str,
          namespaces=None) -> List[etree._Element]:
    nodes: List[etree._Element] = []
    if dom is not None:
        if namespaces is None:
            nodes = cast(List[etree._Element], dom.xpath(xpath_query))
        else:
            nodes = cast(List[etree._Element],
                         dom.xpath(xpath_query, namespaces=namespaces))

    return nodes

Example #16

0

Show file

File: ade_api.py Project: raftar2097/ADE-Scheduler

def parse_event(
    event: etree._Element,
    event_type: backend.events.AcademicalEvent,
    activity_name: str,
    activity_id: str,
    activity_code: str,
) -> backend.events.AcademicalEvent:
    """
    Parses an element from a request into an academical event.
    An event is from an activity so information about this activity must be provided.

    :param event: the event element
    :type event: etree._Element
    :param event_type: the constructor used to initiate to event object
    :type event_type: Type[backend.events.AcademicalEvent]
    :param activity_name: the name of the activity
    :type activity_name: str
    :param activity_id: the id of the activity
    :type activity_id: str
    :param activity_code: the code of the activity
    :type activity_code: str
    :return: the academical event
    :rtype: backend.events.AcademicalEvent
    """
    event_date = event.attrib["date"]
    event_start = event.attrib["startHour"]
    event_end = event.attrib["endHour"]
    rooms = event.xpath('.//eventParticipant[@category="classroom"]')
    classrooms = [room_to_classroom(room) for room in rooms]

    instructors = list()
    for instructor in event.xpath(
            './/eventParticipant[@category="instructor"]'):
        instructors.append(
            professors.Professor(instructor.attrib["name"], None))
    event_instructor = professors.merge_professors(instructors)

    # We create the event
    t0, t1 = backend.events.extract_datetime(event_date, event_start,
                                             event_end)
    return event_type(
        name=activity_name,
        begin=t0,
        end=t1,
        professor=event_instructor,
        classrooms=classrooms,
        id=activity_id,
        code=activity_code,
    )

Example #17

0

Show file

def fill_out_untranscribed_commentary_markup(root: _Element):
    '''convert abbreviated "<comm/>" and "<comm lines=4> tags into IGNTP equivalent markup'''
    tei_ns = 'http://www.tei-c.org/ns/1.0'
    xml_ns = 'http://www.w3.org/XML/1998/namespace'
    comms = root.xpath(f'//tei:comm',
                       namespaces={'tei': tei_ns})  # type: List[_Element]
    for comm in comms:
        if comm.get('lines'):
            index = comm.getparent().index(comm)
            try:
                lines = int(comm.get('lines'))
            except:
                lines = 1
            for _ in range(lines):
                parent = comm.getparent()  #type: _Element
                comm_element = ET.Element('note',
                                          type='commentary',
                                          nsmap={
                                              None: tei_ns,
                                              'xml': xml_ns
                                          })
                comm_element.text = 'One line of untranscribed commentary text'
                lb = ET.Element('lb', nsmap={None: tei_ns, 'xml': xml_ns})
                parent.insert(index, lb)
                parent.insert(index + 1, comm_element)
            comm.getparent().remove(comm)
        else:
            comm.tag = 'note'
            comm.attrib['type'] = 'commentary'
            comm.text = 'untranscribed commentary text'
    return root

Example #18

0

Show file

def add_page_break_type(root: _Element):
    '''adds type="folio" to the page break element
    for a very minor convenvience'''
    tei_ns = 'http://www.tei-c.org/ns/1.0'
    for pb in root.xpath('//tei:pb', namespaces={'tei': tei_ns}):
        pb.attrib['type'] = 'folio'
    return root

Example #19

0

Show file

File: nvpair.py Project: kmalyjur/pcs

def get_value(
    tag_name: str,
    context_element: _Element,
    name: str,
    default: Optional[str] = None,
) -> Optional[str]:
    """
    Return a value from an nvpair

    WARNING: does not solve multiple nvsets (with the same tag_name) in the
    context_element nor multiple nvpair with the same name

    tag_name -- "instance_attributes" or "meta_attributes"
    context_element -- searched element
    name -- nvpair name
    default -- default return value
    """
    value_list = context_element.xpath(
        """
            ./*[local-name()=$tag_name]
            /nvpair[
                @name=$name and string-length(@value) > 0
            ]
            /@value
        """,
        tag_name=tag_name,
        name=name,
    )
    return cast(List[str], value_list)[0] if value_list else default

Example #20

0

Show file

def test_images(xhtml_file: Path, xhtml: _Element) -> bool:
    """
    Test the that all 'img' links are not broken.
    If settings.test_images is True then also use PIL to
    test if the image files are valid.

    :param xhtml_file: the XHTML file's path
    :param xhtml: the XHTML files' root
    :return: True if the images are okay
    """
    success = True
    for img in xhtml.xpath("//xhtml:img", namespaces=XMLNS):
        src = str(img.attrib["src"])
        if ":" not in src:
            img_path = xhtml_file.parent / Path(url2pathname(src))

            if settings.verbose:
                print("\t", img_path)

            if not img_path.is_file():
                print(f"{xhtml_file}:1:0: missing image {img_path}",
                      file=stderr)
                success = False
            elif settings.test_images:
                try:
                    Image.open(img_path).verify()
                except IOError:
                    print(f"{xhtml_file}:1:0: invalid image {img_path}",
                          file=stderr)
                    success = False
    return success

Example #21

0

Show file

    def _parse_content(self, html: _Element) -> str:
        element = html.xpath('p[@class="txt"]')[-1]

        content = []
        if element.text is not None:
            content.append(element.text.strip())

        for child in element.getchildren():
            if child.tag == 'em':
                content.append(child.text.strip())
            elif child.tag == 'a':
                if child.text is not None:
                    content.append(child.text.strip())

                for sub_child in child.getchildren():
                    if sub_child.tag == 'i':
                        continue

                    if sub_child.text:
                        content.append(sub_child.text.strip())

                    if sub_child.tail is not None:
                        content.append(sub_child.tail.strip())

            elif child.text is not None:
                content.append(child.text.strip())

            if child.tail is not None:
                content.append(child.tail.strip())

        return ''.join(content)

Example #22

0

Show file

File: analysis.py Project: dvglc/svsal-factory

 def get_citable_ancestors(self, node: etree._Element, node_type: str, mode: str):
     """
     Gets all citetrail or passagetrail ancestors of a node (switch modes: 'citetrail' vs 'passagetrail').
     """
     tei_ancestors = node.xpath('ancestor::*')
     ancestors = []
     if node_type == 'marginal' or node_type == 'anchor':
         # marginals and anchors must not have p (or some other "main" node) as their parent
         for anc in tei_ancestors:
             if (mode == 'citetrail' or (mode == 'passagetrail' and self.is_passagetrail_node(anc))) \
                     and self.is_structural_node(anc):
                 ancestors.append(anc)
     elif node_type == 'page':
         # within front, back, and single volumes, citable parent resolves to one of those elements for avoiding
         # collisions with identically named pb in other parts
         for anc in tei_ancestors:
             if (mode == 'citetrail' or (mode == 'passagetrail' and self.is_passagetrail_node(anc))) \
                     and exists(anc, 'self::tei:front or self::tei:back'
                                     + ' or self::tei:text[1][not(@xml:id = "completeWork" or @type = "work_part")]'):
                 ancestors.append(anc)
         # note: this makes all other pb appear outside of any structural hierarchy, but this should be fine
     else:
         for anc in tei_ancestors:
             if (mode == 'citetrail' or (mode == 'passagetrail' and self.is_passagetrail_node(anc))) \
                     and self.get_node_type(anc):
                 ancestors.append(anc)
     return ancestors[::-1] # ancestors.reverse() is not working here

Example #23

0

Show file

    def _parse_comment(self, css_url: str, html: _Element) -> str:
        elements = html.xpath(
            'div[@class="main-review"]/div[contains(@class, "review-words")]')
        element = elements[0]

        comment = []
        if element.text is not None:
            comment.append(element.text.strip())

        for child in element.getchildren():
            if child.tag == 'div':
                break

            if child.tag == 'img':
                # 图片的text也为None,但tail可能有文字
                pass
            elif child.text is None:
                svg_url, x, y = self.css_parser.get_position(
                    css_url, child.tag, child.attrib['class'])
                self.svg_parser.append_svg(svg_url)
                text = self.svg_parser.parse(svg_url, x, y)
                comment.append(text)

            if child.tail is not None:
                comment.append(child.tail.strip())

        return ''.join(comment)

Example #24

0

Show file

File: parsing.py Project: azah/OWAPI

def bl_parse_achievement_data(parsed: etree._Element, mode="quickplay"):
    # Start the dict.
    built_dict = {}

    _root = parsed.xpath(
        ".//section[@id='achievements-section']"
    )
    if not _root:
        return
    _root = _root[0]

    _category_selects = _root.xpath(".//select[@data-group-id='achievements']")[0].xpath(".//option")

    for _category_select in _category_selects:
        category_name = _category_select.text
        category_id = _category_select.get("value")

        _achievement_boxes = _root.xpath(
            ".//div[@data-group-id='achievements' and @data-category-id='{0}']/ul/div/div[@data-tooltip]".format(
                category_id))
        n_dict = {}

        for _achievement_box in _achievement_boxes:
            achievement_name = _achievement_box.xpath("./div/div")[0].text
            if achievement_name == '?':
                # Sombra ARG clue, not a real achievement
                continue

            n_dict[util.sanitize_string(achievement_name)] = "m-disabled" not in _achievement_box.get("class")

        built_dict[category_name.lower()] = n_dict

    return built_dict

Example #25

0

Show file

def _get_lrm_rsc_op_elements(
    cib: _Element,
    resource_id: str,
    node_name: str,
    op_name: str,
    interval: Optional[str] = None,
) -> List[_Element]:
    """
    Get a lrm_rsc_op element from cib status.

    resource_id -- resource id whose belonging element we want to find
    node_name -- name of the node where resource is running
    op_name -- operation name (start or monitor)
    interval -- operation interval using for monitor operation selection
    """
    return cast(
        List[_Element],
        cib.xpath(
            """
            ./status/node_state[@uname=$node_name]
            /lrm/lrm_resources/lrm_resource[@id=$resource_id]
            /lrm_rsc_op[@operation=$op_name{interval}]
            """.format(
                interval=" and @interval=$interval" if interval else ""),
            node_name=node_name,
            resource_id=resource_id,
            op_name=op_name,
            interval=interval if interval else "",
        ),
    )

Example #26

0

Show file

 def _parse_review_count(self, html: _Element, num_font_url: str) -> int:
     elements = html.xpath('//span[@id="reviewCount"]')
     content = self._parse_number(elements[0], num_font_url)
     matches = self.review_count_pattern.findall(content)
     if len(matches) != 1:
         raise Exception(f'Not found review count from {content}')
     return int(matches[0])

Example #27

0

Show file

File: cib_to_dto.py Project: liuxiran/pcs

def _rule_to_dto(rule_el: _Element) -> CibRuleExpressionDto:
    children_dto_list = [
        _tag_to_export[str(child.tag)](child)
        # The xpath method has a complicated return value, but we know our xpath
        # expression only returns elements.
        for child in cast(_Element, rule_el.xpath(_xpath_for_export))
    ]
    # "and" is a documented pacemaker default
    # https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html-single/Pacemaker_Explained/index.html#_rule_properties
    boolean_op = str(rule_el.get("boolean-op", "and"))
    string_parts = []
    for child_dto in children_dto_list:
        if child_dto.type == CibRuleExpressionType.RULE:
            string_parts.append(f"({child_dto.as_string})")
        else:
            string_parts.append(child_dto.as_string)
    return CibRuleExpressionDto(
        str(rule_el.get("id", "")),
        _tag_to_type[str(rule_el.tag)],
        False,  # TODO implement is_expired
        export_attributes(rule_el, with_id=False),
        None,
        None,
        children_dto_list,
        f" {boolean_op} ".join(string_parts),
    )

Example #28

0

Show file

    def get_title(self, item: etree._Element) -> Optional[etree._Element]:
        title: Optional[etree._Elements] = None
        for t in cast(List[etree._Element], item.xpath(".//title")):
            title = t
            break

        return title

Example #29

0

Show file

def bl_parse_achievement_data(parsed: etree._Element, mode="quickplay"):
    # Start the dict.
    built_dict = {}

    _root = parsed.xpath(".//section[@id='achievements-section']")
    if not _root:
        return
    _root = _root[0]

    _category_selects = _root.xpath(
        ".//select[@data-group-id='achievements']")[0].xpath(".//option")

    for _category_select in _category_selects:
        category_name = _category_select.text
        category_id = _category_select.get("value")

        _achievement_boxes = _root.xpath(
            ".//div[@data-group-id='achievements' and @data-category-id='{0}']"
            "/ul/div/div[@data-tooltip]".format(category_id))
        n_dict = {}

        for _achievement_box in _achievement_boxes:
            achievement_name = _achievement_box.xpath("./div/div")[0].text
            if achievement_name == "?":
                # Sombra ARG clue, not a real achievement
                continue

            n_dict[util.sanitize_string(
                achievement_name)] = "m-disabled" not in _achievement_box.get(
                    "class")

        built_dict[category_name.lower()] = n_dict

    return built_dict

Example #30

0

Show file

 def _parse_topic_name_and_url(self, html: _Element) -> (str, str):
     elements = html.xpath('div/a[@class="name"]')
     if not elements:
         raise Exception('Failed to find topic name.')
     name = elements[0].text
     url = elements[0].attrib['href']
     return name, url

Example #31

0

Show file

File: ingest.py Project: rjullman/board-game-search

def xml_get_opt(
    item: XMLElement,
    path: Optional[str] = None,
    attr: Optional[str] = None,
    *,
    xpath: bool = False,
) -> Optional[str]:
    """
    Helper for LXML to get an attribute at a path from an XML element.

    Returns None if the path or attribute do not exist.
    """
    if path:
        if xpath:
            elems = item.xpath(path)
            assert isinstance(elems, list)
            elem = elems[0] if elems else None
            assert elem is None or isinstance(elem, XMLElement)
        else:
            elem = item.find(path)
    else:
        elem = item

    if elem is None:
        return None
    ret = elem.get(attr) if attr else elem.text
    if ret is None:
        return None
    if isinstance(ret, bytes):
        raise ValueError("Expected result to be a str not bytes.")
    return ret

Example #32

0

Show file

File: parsing.py Project: azah/OWAPI

def bl_parse_hero_data(parsed: etree._Element, mode="quickplay"):
    # Start the dict.
    built_dict = {}

    _root = parsed.xpath(
        ".//div[@id='{}']".format("competitive" if mode == "competitive" else "quickplay")
    )
    if not _root:
        return

    for hero_name, requested_hero_div_id in hero_data_div_ids.items():
        n_dict = {}
        _stat_groups = _root[0].xpath(
            ".//div[@data-group-id='stats' and @data-category-id='{0}']".format(requested_hero_div_id)
        )

        if not _stat_groups:
            continue

        stat_groups = _stat_groups[0]

        _t_d = {}
        hero_specific_box = stat_groups[0]
        trs = hero_specific_box.findall(".//tbody/tr")
        # Update the dict with [0]: [1]
        for subval in trs:
            name, value = util.sanitize_string(subval[0].text), subval[1].text
            if 'average' in name.lower():
                # No averages, ty
                continue
            nvl = util.try_extract(value)
            _t_d[name] = nvl

        n_dict["hero_stats"] = _t_d

        _t_d = {}
        for subbox in stat_groups[1:]:
            trs = subbox.findall(".//tbody/tr")
            # Update the dict with [0]: [1]
            for subval in trs:
                name, value = util.sanitize_string(subval[0].text), subval[1].text
                if 'average' in name.lower():
                    # No averages, ty
                    continue
                nvl = util.try_extract(value)
                _t_d[name] = nvl

        n_dict["general_stats"] = _t_d

        built_dict[hero_name] = n_dict

    return built_dict

Example #33

0

Show file

File: parsing.py Project: Js41637/OWAPI

def bl_find_heroes(parsed: etree._Element):
    # Start the dict.
    built_dict = {"role": "", "difficulty": "", "abilities": {}}

    difficulty = len(parsed.findall(".//span[@class='star']"))
    role = parsed.xpath(".//h4[@class='h2 hero-detail-role-name']")[0].text
    _abilities = parsed.findall(".//div[@class='hero-ability-descriptor']")
    abilities = {}

    for ability in _abilities:
        name, description = ability[0].text, ability[1].text
        abilities[name] = description

    built_dict["difficulty"] = difficulty
    built_dict["role"] = role
    built_dict["abilities"] = abilities

    return built_dict

Example #34

0

Show file

File: parsing.py Project: Js41637/OWAPI

def bl_parse_hero_data(parsed: etree._Element, mode="quickplay"):
    # Start the dict.
    built_dict = {}

    _root = parsed.xpath(
        ".//div[@id='{}']".format("competitive" if mode == "competitive" else "quickplay")
    )
    if not _root:
        return None

    try:
        # XPath for the `u-align-center` h6 which signifies there's no data.
        no_data = _root[0].xpath(".//ul/h6[@class='u-align-center']".format(mode))[0]
    except IndexError:
        pass
    else:
        if no_data.text.strip() == "We don't have any data for this account in this mode yet.":
            return None

    for hero_name, requested_hero_div_id in hero_data_div_ids.items():
        n_dict = {}
        _stat_groups = _root[0].xpath(
            ".//div[@data-group-id='stats' and @data-category-id='{0}']"
                .format(requested_hero_div_id)
        )

        if not _stat_groups:
            continue

        stat_groups = _stat_groups[0]
        _average_stats = {}

        _t_d = {}

        # offset for subboxes
        # if there IS a hero-specific box, we need to scan all boxes from offset to end
        # because the hero-specific box is first.
        # if there is NOT, we scan all boxes later.
        # this is determined by the xpath to find the Hero Specific page.
        subbox_offset = 0

        # .find on the assumption hero box is the *first* item
        hbtitle = stat_groups.find(".//span[@class='stat-title']").text
        if hbtitle == "Hero Specific":
            subbox_offset = 1
            hero_specific_box = stat_groups[0]
            trs = hero_specific_box.findall(".//tbody/tr")
            # Update the dict with [0]: [1]
            for subval in trs:
                name, value = util.sanitize_string(subval[0].text), subval[1].text

                # Put averages into average_stats
                if "average" in name:
                    into = _average_stats
                else:
                    into = _t_d
                nvl = util.try_extract(value)
                into[name] = nvl

        n_dict["hero_stats"] = _t_d

        _t_d = {}
        for subbox in stat_groups[subbox_offset:]:
            trs = subbox.findall(".//tbody/tr")
            # Update the dict with [0]: [1]
            for subval in trs:
                name, value = util.sanitize_string(subval[0].text), subval[1].text
                # Put averages into average_stats
                if "average" in name:
                    into = _average_stats
                else:
                    into = _t_d
                nvl = util.try_extract(value)
                into[name] = nvl

        n_dict["general_stats"] = _t_d
        n_dict["average_stats"] = _average_stats

        built_dict[hero_name] = n_dict

    return built_dict