def clean_element(self, elem: html.Element):

        tag = elem.tag
        if tag in [
                "script", "noscript", "style", "meta", "input", "iframe",
                "select", "link", "font"
        ]:
            elem.getparent().remove(elem)
            return
        if tag == etree.Comment:
            elem.getparent().remove(elem)
            return
        if tag == etree.ProcessingInstruction:
            elem.getparent().remove(elem)
            return

        if tag == "form":
            a = elem.attrib.get("action")
            if a != None: del elem.attrib["action"]
            x = elem.attrib.get("onsubmit")
            if x != None: del elem.attrib["onsubmit"]

        if tag == "a":
            href = elem.attrib.get("href")
            if href != None and (href.startswith("https://twitter.com")
                                 or href.startswith("http://twitter.com")
                                 or href.startswith("https://t.co")):
                self.remove_twitter_cluster(elem.getparent())

        if tag == "svg":
            while len(elem):
                del elem[0]
        else:
            for ch in elem:
                self.clean_element(ch)

        if tag in ["div", "span"]:
            if self.is_empty(elem):
                elem.getparent().remove(elem)
                return
            if self.mark_special_case(elem):
                return
        elif tag in ["a"]:
            if self.mark_special_case(elem):
                return

            # strip spaces from simple links
            if len(elem) > 0:
                elem[-1].tail = None
            elif elem.text != None:
                elem.text = elem.text.strip()

        self.clean_attributes(elem)
    def regularize_attrib(self, elem: html.Element, n: str):
        v = elem.attrib[n]
        if v == None: return

        if n == "id":
            if self.is_guid(v): elem.attrib[n] = "[guid]"
        elif n == "href":
            if v.startswith("#") and self.is_guid(v):
                elem.attrib[n] = "#"
            elif v.startswith("https://www.google.com/url?q="):
                if self.trace: logger.info(f"google >>{v}<<")
                idx = v.find("&ust=")
                if idx > 0:
                    if self.trace: logger.info("removed ust")
                    elem.attrib[n] = v[0:idx]
            elif v.startswith("https://twitter.com"
                              ) and elem.text != None and elem.text.startswith(
                                  "@"):
                elem.getparent().remove(elem)
        elif n == "src":
            if v.startswith("https://www.youtube.com/"):
                elem.attrib[n] = "https://www.youtube.com"
    def remove_twitter_cluster(self, elem: html.Element):

        if self.trace: logger.info(f"  twitter: check")

        # check it
        if elem.tag != "span":
            if self.trace: logger.info(f"  twitter: not span")
            return
        for e in elem:
            if e.tag != "a":
                if self.trace: logger.info(f"  twitter: child not a")
                return
            href = e.attrib["href"]
            if href == None:
                if self.trace: logger.info(f"  twitter: child no href")
                return
            if not (href.startswith("http://twitter.com")
                    or href.startswith("https://twitter.com")
                    or href.startswith("https://t.co")):
                if self.trace: logger.info(f"  twitter: child bad link")
                return
        p = elem.getparent()
        if len(p) != 1 and len(p) != 2:
            if self.trace: logger.info(f"  twitter: parent length ({len(p)})")
            return
        p = p.getparent()
        if len(p) != 1 and len(p) != 2:
            if self.trace:
                logger.info(f"  twitter: parent.parent length ({len(p)})")
            return

        elem_next = p[len(p) - 1]
        if elem_next == None:
            if self.trace: logger.info(f"  twitter: missing next")
            return

        text = html.tostring(elem_next)
        if self.trace: logger.info(f"  twitter next >>{text}<<")
        if not b"> ago" in text:
            if self.trace: logger.info(f"  twitter: missing ago")
            return

        # mark it for removal
        if self.trace: logger.info(f"  twitter: remove")
        p.text = ""
        for e in p:
            self.to_remove.append(e)