Esempio n. 1
0
def number_of_p_children(element: Element):
    """
    get number of p tags in children
    :param element:
    :return:
    """
    if element is None:
        return 0
    return len(element.xpath('./p'))
Esempio n. 2
0
def number_of_a_descendants(element: Element):
    """
    get number of a tags in this element
    :param element:
    :return:
    """
    if element is None:
        return 0
    return len(element.xpath('.//a'))
Esempio n. 3
0
def number_of_p_descendants(element: Element):
    """
    get number of p tags in descendants
    :param element:
    :return:
    """
    if element is None:
        return 0
    return len(element.xpath('.//p'))
Esempio n. 4
0
def siblings(element: Element, including=False):
    """
    get siblings of element
    :param element:
    :param including: include current element or not
    :return:
    """
    if element is None:
        return []
    if including:
        yield element
    for sibling in element.itersiblings(preceding=True):
        if isinstance(sibling, HtmlElement):
            sibling.__class__ = Element
            yield sibling
    for sibling in element.itersiblings(preceding=False):
        if isinstance(sibling, HtmlElement):
            sibling.__class__ = Element
            yield sibling
Esempio n. 5
0
def number_of_a_char(element: Element):
    """
    get number of linked char, for example, result of `<a href="#">hello</a>world` = 5
    :param element:
    :return: length
    """
    if element is None:
        return 0
    text = ''.join(element.xpath('.//a//text()'))
    text = re.sub(r'\s*', '', text, flags=re.S)
    return len(text)
Esempio n. 6
0
def remove_element(element: Element):
    """
    remove child element from parent
    :param element:
    :return:
    """
    if element is None:
        return
    p = element.getparent()
    if p is not None:
        p.remove(element)
Esempio n. 7
0
def text(element: Element):
    """
    get text of element
    :param element:
    :return:
    """
    if element is None:
        return 0
    text = ''.join(element.xpath('.//text()'))
    text = re.sub(r'\s*', '', text, flags=re.S)
    return text
Esempio n. 8
0
 def _has_datetime_mata(self, element: Element):
     """
     has datetime meta
     :param element:
     :return:
     """
     for xpath in DATETIME_METAS:
         datetime = element.xpath(xpath)
         if datetime:
             return True
     return False
Esempio n. 9
0
def number_of_punctuation(element: Element):
    """
    get number of punctuation of text in this element
    :param element:
    :return:
    """
    if element is None:
        return 0
    text = ''.join(element.xpath('.//text()'))
    text = re.sub(r'\s*', '', text, flags=re.S)
    punctuations = [c for c in text if c in PUNCTUATION]
    return len(punctuations)
Esempio n. 10
0
def parent(element: Element):
    """
    get parent of element
    :param element:
    :return:
    """
    if element is None:
        return None
    parent = element.getparent()
    if isinstance(parent, ModuleType):
        parent.__class__ = Element
    return parent
Esempio n. 11
0
def a_descendants(element: Element):
    """
    get
    :param element:
    :return:
    """
    if element is None:
        return []
    descendants = []
    for descendant in element.xpath('.//a'):
        descendant.__class__ = Element
        descendants.append(descendant)
    return descendants
Esempio n. 12
0
def descendants_of_body(element: Element):
    """
    get descendants element of body element
    :param element:
    :return:
    """
    if element is None:
        return []
    body_xpath = '//body'
    elements = element.xpath(body_xpath)
    if elements:
        elements[0].__class__ = Element
        return list(descendants(elements[0], True))
    return []
Esempio n. 13
0
def children_of_head(element: Element):
    """
    get children element of body element
    :param element:
    :return:
    """
    if element is None:
        return []
    body_xpath = '//head'
    body_element = element.xpath(body_xpath)
    if body_element:
        body_element.__class__ = Element
        return descendants(body_element, True)
    return []
Esempio n. 14
0
def path(element: Element):
    """
    get tag path using recursive function.
    for example result: html/body/div/div/ul/li
    :param element:
    :return:
    """
    if element is None:
        return ''
    result = path_raw(element)
    # get nth-child
    nth = len(list(element.itersiblings(preceding=True))) + 1
    result += f':nth-child({nth})' if nth != 1 else ''
    return result
Esempio n. 15
0
def descendants(element: Element, including=False):
    """
    get descendants clement of specific element
    :param element: parent element
    :param including: including current element or not
    :return:
    """
    if element is None:
        return []
    if including:
        yield element
    for descendant in element.iterdescendants():
        if isinstance(descendant, HtmlElement):
            descendant.__class__ = Element
            yield descendant
Esempio n. 16
0
def children(element: Element, including=False):
    """
    get children
    :param element:
    :param including:
    :return:
    """
    if element is None:
        return []
    if including:
        yield element
    for child in element.iterchildren():
        if isinstance(child, HtmlElement):
            child.__class__ = Element
            yield child
Esempio n. 17
0
def remove_children(element: Element, xpaths):
    """
    remove children from element
    :param element:
    :param xpaths:
    :return:
    """
    if element is None:
        return
    if not xpaths:
        return
    for xpath in xpaths:
        nodes = element.xpath(xpath)
        for node in nodes:
            remove_element(node)
    return element
Esempio n. 18
0
def alias(element: Element):
    """
    get alias of element, concat tag and attribs
    :param element:
    :return:
    """
    if element is None:
        return ''
    tag = element.tag
    attribs = [tag]
    for k, v in element.attrib.items():
        k, v = re.sub(r'\s*', '', k), re.sub(r'\s*', '', v)
        attribs.append(f'[{k}="{v}"]' if v else f'[{k}]')
    result = ''.join(attribs)
    # get nth-child
    nth = len(list(element.itersiblings(preceding=True))) + 1
    result += f'::nth-child({nth})' if nth != 1 else ''
    return result