Ejemplo n.º 1
0
def number_of_a_descendants(element: Element):
    """
    get number of a tags in this element
    :param element:
    :return:
    """
    if element is None:
        return 0
    return len(element.xpath('.//a'))
Ejemplo n.º 2
0
def number_of_p_children(element: Element):
    """
    get number of p tags in children
    :param element:
    :return:
    """
    if element is None:
        return 0
    return len(element.xpath('./p'))
Ejemplo n.º 3
0
def number_of_p_descendants(element: Element):
    """
    get number of p tags
    :param element:
    :return:
    """
    if element is None:
        return 0
    return len(element.xpath('.//p'))
Ejemplo n.º 4
0
def number_of_a_char(element: Element):
    """
    get number of linked char, for example, result of `<a href="#">hello</a>world` = 5
    :param element:
    :return: length
    """
    if element is None:
        return 0
    text = ''.join(element.xpath('.//a//text()'))
    text = re.sub(r'\s*', '', text, flags=re.S)
    return len(text)
Ejemplo n.º 5
0
def text(element: Element):
    """
    get text of element
    :param element:
    :return:
    """
    if element is None:
        return 0
    text = ''.join(element.xpath('.//text()'))
    text = re.sub(r'\s*', '', text, flags=re.S)
    return text
Ejemplo n.º 6
0
 def _has_datetime_mata(self, element: Element):
     """
     has datetime meta
     :param element:
     :return:
     """
     for xpath in DATETIME_METAS:
         datetime = element.xpath(xpath)
         if datetime:
             return True
     return False
Ejemplo n.º 7
0
def number_of_punctuation(element: Element):
    """
    get number of punctuation of text in this element
    :param element:
    :return:
    """
    if element is None:
        return 0
    text = ''.join(element.xpath('.//text()'))
    text = re.sub(r'\s*', '', text, flags=re.S)
    punctuations = [c for c in text if c in PUNCTUATION]
    return len(punctuations)
Ejemplo n.º 8
0
def a_descendants(element: Element):
    """
    get
    :param element:
    :return:
    """
    if element is None:
        return []
    descendants = []
    for descendant in element.xpath('.//a'):
        descendant.__class__ = Element
        descendants.append(descendant)
    return descendants
Ejemplo n.º 9
0
def descendants_of_body(element: Element):
    """
    get descendants element of body element
    :param element:
    :return:
    """
    if element is None:
        return []
    body_xpath = '//body'
    elements = element.xpath(body_xpath)
    if elements:
        elements[0].__class__ = Element
        return list(descendants(elements[0], True))
    return []
Ejemplo n.º 10
0
def children_of_head(element: Element):
    """
    get children element of body element
    :param element:
    :return:
    """
    if element is None:
        return []
    body_xpath = '//head'
    body_element = element.xpath(body_xpath)
    if body_element:
        body_element.__class__ = Element
        return descendants(body_element, True)
    return []
Ejemplo n.º 11
0
def remove_children(element: Element, xpaths):
    """
    remove children from element
    :param element:
    :param xpaths:
    :return:
    """
    if element is None:
        return
    if not xpaths:
        return
    for xpath in xpaths:
        nodes = element.xpath(xpath)
        for node in nodes:
            remove_element(node)
    return element