Esempi in Python per HTMLParser.handle_starttag

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: html.parser

Classe/tipologia: HTMLParser

Metodo/funzione: handle_starttag

Esempi su hotexamples.com: 7

HTMLParser.handle_starttag in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per html.parser.HTMLParser.handle_starttag, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

HTMLParser(30)

feed(30)

reset(30)

__init__(30)

unescape(30)

close(30)

handle_data(14)

replace(8)

strip(6)

get_starttag_text(5)

handle_starttag(5)

decode(5)

split(4)

lower(4)

handle_endtag(3)

handle_comment(2)

encode(2)

handle_startendtag(2)

error(2)

fed(1)

text(1)

strict(1)

_init_(1)

closer(1)

lstrip(1)

getElementsByTagName(1)

important_tag(1)

hrefs(1)

find(1)

findLinks(1)

handle_decl(1)

convert_charrefs(1)

Esempio n. 1

Mostra file

File: rimg.py Progetto: chibiskuld/ccawmunity

    def run(self, event_pack: EventPackage):
        random.seed(time.time())

        #prepare the search terms
        searchTerms = event_pack.body
        searchTerms.pop(0)
        search = "sfw+"+"+".join(searchTerms)
        url = "https://www.google.com/search?tbm=isch&q="+search+"&oq="+search+"&gs_l=img&safesearch=on"

        #get the page
        headers = {}
        headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
        req = urllib.request.Request(url,headers=headers)
        response = urllib.request.urlopen(req)
        text = response.read()

        #html parser
        parser = HTMLParser()
        theImages = []
        def handleTag(tag, attrs):
            if tag == "img":
                for n in attrs:
                    if n[0] == "data-src":
                        #print(n[1])
                        theImages.append(str(n[1]))

        parser.handle_starttag = handleTag
        parser.feed(str(text))

        nrimg = random.randint(0,len(theImages))

        return theImages[nrimg]

Esempio n. 2

Mostra file

File: utility.py Progetto: anonaipai/ona.py

    async def source(self, ctx):
        '''Perform a reverse image search using iqdb.org.'''
        url = await ctx.get_last_url()
        body = (await self.ona.request("http://iqdb.org", method="POST", data={"url": url})).decode()
        self.ona.assert_("No relevant matches" not in body, "HTTP request failed" not in body,
                         error="No results found.")
        parser = HTMLParser()
        hrefs = []

        def handler(tag, attrs):    # This handler parses the iqdb.org response html for all href links
            any(hrefs.append(attr[1]) for attr in attrs if attr[0] == "href")
        parser.handle_starttag = handler
        parser.feed(body)
        href = hrefs[2]   # The second href is the "best match"
        if href.startswith("//"):    # Fix links
            href = f"https:{href}"
        await ctx.send(f"Here's the closest match:\n{href}")

Esempio n. 3

Mostra file

 def handle_starttag(self, tag, attrs):
     HTMLParser.handle_starttag(self, tag, attrs)
     if tag.lower() in self.strip_tags:
         self.collect = False

Esempio n. 4

Mostra file

File: shared.py Progetto: wbnxvzqbrfagznggre/fuglu

 def handle_starttag(self, tag, attrs):
     HTMLParser.handle_starttag(self, tag, attrs)
     if tag.lower() in self.strip_tags:
         self.collect = False

Esempio n. 5

Mostra file

File: htmlPaser_code.py Progetto: cnyan/PyDemo

 def handle_starttag(self, tag, attrs):
     HTMLParser.handle_starttag(set, tag, attrs)
     print('<' + tag + '>')

Esempio n. 6

Mostra file

File: html_parse.py Progetto: edonyM/pyexer

        """
        self.warningcolor = '\033[0;37;41m'
        self.tipcolor = '\033[0;31;42m'
        self.endcolor = '\033[0m'
        self._newcolor = ''
    @property
    def new(self):
        """
        Customized Python Print Color.
        """
        return self._newcolor
    @new.setter
    def new(self, color_str):
        """
        New Color.
        """
        self._newcolor = color_str
    def disable(self):
        """
        Disable Color Print.
        """
        self.warningcolor = ''
        self.endcolor = ''

# TODO:(edony) Can not filter the needed infomation
source_html = requests.get(r'https://www.python.org/events/python-events/')
content = source_html.text
p = HTMLParser()
p.feed(content)
print(p.handle_starttag('h3',['class']))

Esempio n. 7

Mostra file

File: my_parser.py Progetto: paddy235/photo_tag

    def handle_starttag(self, tag, attrs):
        '''
        获取起始标签 <xxx> 其中也会包括直接闭合标签例如meta,hr,br
        :param tag:标签名
        :param attrs:属性列表
        :return:
        '''

        # 父方法获取对应的标签和属性
        HTMLParser.handle_starttag(self, tag, attrs)
        # print("tag_start:{0}".format(tag))
        # 排除掉直接闭合标签的干扰
        if tag in startendtag_list:
            return

        # 新建该节点 并分辨该节点的类型
        node = wn.WebNode()
        if tag == "p":
            node = wn.PNode()
        elif tag == "strong":
            node = wn.StrongNode()
        elif tag == "span":
            node = wn.SpanNode()
        elif tag == "html":
            node = wn.HtmlNode()
        elif tag == "head":
            node = wn.HeadNode()
        elif tag == "body":
            node = wn.BodyNode()
        elif tag == "style":
            node = wn.StyleNode()
        elif tag == "h1":
            node = wn.H2Node()
        elif tag == "h2":
            node = wn.H2Node()
        elif tag == "h3":
            node = wn.H2Node()
        elif tag == "table":
            node = wn.TableNode()
        elif tag == "tr":
            node = wn.TrNode()
        elif tag == "td":
            node = wn.TdNode()
        elif tag == "ol":
            node = wn.OlNode()
        elif tag == "li":
            node = wn.LiNode()
        elif tag == "title":
            node = wn.TitleNode()
        elif tag == "div":
            node = wn.DivNode()
        elif tag == "form":
            node = wn.FormNode()
        elif tag == "select":
            node = wn.SelectNode()
        elif tag == "option":
            node = wn.OptionNode()
        elif tag == "script":
            node = wn.ScriptNode()
        elif tag == "input":
            node = wn.InputNode()
        elif tag == "link":
            node = wn.LinkNode()
        # 赋值标签，之前的当前节点赋值新节点的父节点
        node.tag = tag
        node.father = self.now
        # 判断父节点是否为空，即是否是顶层的
        if node.father != None:
            # 表明该节点比其父节点低一层
            node.index = node.father.index + 1
            # 判断父节点的path是否为空，即是否第一次记录path
            if node.father.path == "":
                # 为空则记录初始父节点
                node.path = "{0}[{1}]".format(tag, len(node.father.children)+1)
            else:
                # 不为空则继续增加路径
                node.path = node.father.path + "-" + "{0}[{1}]".format(tag, len(node.father.children)+1)
        else:
            # 没有父节点，表明其层级最高
            node.index = 0
        # 添加属性
        for each in attrs:
            node.attr[each[0]]=each[1]

        # 将新节点置为父节点，向下移一层
        self.now = node
        # 如果是可能非闭合标签 其不可能有子节点，则必然将其闭合，然后重新走start流程
        if self.now.tag in maybenoendtag_list:
            self.now.father.children.append(self.now)
            self.now = self.now.father