Python parser Examples

Programming Language: Python

Namespace/Package Name: html

Class/Type: parser

Examples at hotexamples.com: 3

Python parser - 3 examples found. These are the top rated real world Python examples of html.parser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(1)

feed(1)

replace(1)

Frequently Used Methods

close (1)

feed (1)

replace (1)

Example #1

Show file

File: read_only_nodes.py Project: pmills/leo-editor

def insert_read_only_node(c, p, name):
    if name == "":
        name = g.app.gui.runOpenFileDialog(
            c,
            title="Open",
            filetypes=[("All files", "*")],
        )
        c.setHeadString(p, "@read-only %s" % name)
        c.redraw()
    parse = urlparse.urlparse(name)
    try:
        if parse[0] == 'ftp':
            file = FTPurl(name)  # FTP URL
        elif parse[0] == 'http':
            file = urllib.urlopen(name)  # HTTP URL
        else:
            file = open(name, "r")  # local file
        g.es("..." + name)
        new = file.read()
        file.close()
    except IOError:  # as msg:
        # g.es("error reading %s: %s" % (name, msg))
        # g.es("...not found: " + name)
        c.setBodyString(p, "")  # Clear the body text.
        return True  # Mark the node as changed.
    else:
        ext = os.path.splitext(parse[2])[1]
        if ext.lower() in ['.htm', '.html']:
            #@+<< convert HTML to text >>
            #@+node:edream.110203113231.895: *3* << convert HTML to text >>
            fh = StringIO()
            fmt = AbstractFormatter(DumbWriter(fh))
            # the parser stores parsed data into fh (file-like handle)
            parser = HTMLParser(fmt)

            # send the HTML text to the parser
            parser.feed(new)
            parser.close()

            # now replace the old string with the parsed text
            new = fh.getvalue()
            fh.close()

            # finally, get the list of hyperlinks and append to the end of the text
            hyperlinks = parser.anchorlist
            numlinks = len(hyperlinks)
            if numlinks > 0:
                hyperlist = ['\n\n--Hyperlink list follows--']
                for i in range(numlinks):
                    hyperlist.append("\n[%d]: %s" %
                                     (i + 1, hyperlinks[i]))  # 3/26/03: was i.
                new = new + ''.join(hyperlist)
            #@-<< convert HTML to text >>
        previous = p.b
        c.setBodyString(p, new)
        changed = (g.toUnicode(new) != g.toUnicode(previous))
        if changed and previous != "":
            g.es("changed: %s" % name)  # A real change.
        return changed

Example #2

Show file

File: read_only_nodes.py Project: Armagedoom/leo-editor

def insert_read_only_node (c,p,name):
    if name=="":
        name = g.app.gui.runOpenFileDialog(
            title="Open",
            filetypes=[("All files", "*")],
        )
        c.setHeadString(p,"@read-only %s" % name)
        c.redraw()
    parse = urlparse.urlparse(name)
    try:
        if parse[0] == 'ftp':
            file = FTPurl(name)  # FTP URL
        elif parse[0] == 'http':
            file = urllib.urlopen(name)  # HTTP URL
        else:
            file = open(name,"r")  # local file
        g.es("..." + name)
        new = file.read()
        file.close()
    except IOError as msg:
        # g.es("error reading %s: %s" % (name, msg))
        # g.es("...not found: " + name)
        c.setBodyString(p,"") # Clear the body text.
        return True # Mark the node as changed.
    else:
        ext = os.path.splitext(parse[2])[1]
        if ext.lower() in ['.htm', '.html']:
            #@+<< convert HTML to text >>
            #@+node:edream.110203113231.895: *3* << convert HTML to text >>
            fh = StringIO()
            fmt = AbstractFormatter(DumbWriter(fh))
            # the parser stores parsed data into fh (file-like handle)
            parser = HTMLParser(fmt)

            # send the HTML text to the parser
            parser.feed(new)
            parser.close()

            # now replace the old string with the parsed text
            new = fh.getvalue()
            fh.close()

            # finally, get the list of hyperlinks and append to the end of the text
            hyperlinks = parser.anchorlist
            numlinks = len(hyperlinks)
            if numlinks > 0:
                hyperlist = ['\n\n--Hyperlink list follows--']
                for i in range(numlinks):
                    hyperlist.append("\n[%d]: %s" % (i+1,hyperlinks[i])) # 3/26/03: was i.
                new = new + ''.join(hyperlist)
            #@-<< convert HTML to text >>
        previous = p.b
        c.setBodyString(p,new)
        changed = (g.toUnicode(new) != g.toUnicode(previous))
        if changed and previous != "":
            g.es("changed: %s" % name) # A real change.
        return changed

Example #3

Show file

File: client.py Project: aky-01/plugin.video.tv2play

def replaceHTMLCodes(txt):
    txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
    txt = HTMLParser().unescape(txt)
    txt = txt.replace("&quot;", "\"")
    txt = txt.replace("&amp;", "&")
    return txt