def number_of_a_descendants(element: Element): """ get number of a tags in this element :param element: :return: """ if element is None: return 0 return len(element.xpath('.//a'))
def number_of_p_children(element: Element): """ get number of p tags in children :param element: :return: """ if element is None: return 0 return len(element.xpath('./p'))
def number_of_p_descendants(element: Element): """ get number of p tags :param element: :return: """ if element is None: return 0 return len(element.xpath('.//p'))
def number_of_a_char(element: Element): """ get number of linked char, for example, result of `<a href="#">hello</a>world` = 5 :param element: :return: length """ if element is None: return 0 text = ''.join(element.xpath('.//a//text()')) text = re.sub(r'\s*', '', text, flags=re.S) return len(text)
def text(element: Element): """ get text of element :param element: :return: """ if element is None: return 0 text = ''.join(element.xpath('.//text()')) text = re.sub(r'\s*', '', text, flags=re.S) return text
def _has_datetime_mata(self, element: Element): """ has datetime meta :param element: :return: """ for xpath in DATETIME_METAS: datetime = element.xpath(xpath) if datetime: return True return False
def number_of_punctuation(element: Element): """ get number of punctuation of text in this element :param element: :return: """ if element is None: return 0 text = ''.join(element.xpath('.//text()')) text = re.sub(r'\s*', '', text, flags=re.S) punctuations = [c for c in text if c in PUNCTUATION] return len(punctuations)
def a_descendants(element: Element): """ get :param element: :return: """ if element is None: return [] descendants = [] for descendant in element.xpath('.//a'): descendant.__class__ = Element descendants.append(descendant) return descendants
def descendants_of_body(element: Element): """ get descendants element of body element :param element: :return: """ if element is None: return [] body_xpath = '//body' elements = element.xpath(body_xpath) if elements: elements[0].__class__ = Element return list(descendants(elements[0], True)) return []
def children_of_head(element: Element): """ get children element of body element :param element: :return: """ if element is None: return [] body_xpath = '//head' body_element = element.xpath(body_xpath) if body_element: body_element.__class__ = Element return descendants(body_element, True) return []
def remove_children(element: Element, xpaths): """ remove children from element :param element: :param xpaths: :return: """ if element is None: return if not xpaths: return for xpath in xpaths: nodes = element.xpath(xpath) for node in nodes: remove_element(node) return element