def insert_read_only_node(c, p, name): if name == "": name = g.app.gui.runOpenFileDialog( c, title="Open", filetypes=[("All files", "*")], ) c.setHeadString(p, "@read-only %s" % name) c.redraw() parse = urlparse.urlparse(name) try: if parse[0] == 'ftp': file = FTPurl(name) # FTP URL elif parse[0] == 'http': file = urllib.urlopen(name) # HTTP URL else: file = open(name, "r") # local file g.es("..." + name) new = file.read() file.close() except IOError: # as msg: # g.es("error reading %s: %s" % (name, msg)) # g.es("...not found: " + name) c.setBodyString(p, "") # Clear the body text. return True # Mark the node as changed. else: ext = os.path.splitext(parse[2])[1] if ext.lower() in ['.htm', '.html']: #@+<< convert HTML to text >> #@+node:edream.110203113231.895: *3* << convert HTML to text >> fh = StringIO() fmt = AbstractFormatter(DumbWriter(fh)) # the parser stores parsed data into fh (file-like handle) parser = HTMLParser(fmt) # send the HTML text to the parser parser.feed(new) parser.close() # now replace the old string with the parsed text new = fh.getvalue() fh.close() # finally, get the list of hyperlinks and append to the end of the text hyperlinks = parser.anchorlist numlinks = len(hyperlinks) if numlinks > 0: hyperlist = ['\n\n--Hyperlink list follows--'] for i in range(numlinks): hyperlist.append("\n[%d]: %s" % (i + 1, hyperlinks[i])) # 3/26/03: was i. new = new + ''.join(hyperlist) #@-<< convert HTML to text >> previous = p.b c.setBodyString(p, new) changed = (g.toUnicode(new) != g.toUnicode(previous)) if changed and previous != "": g.es("changed: %s" % name) # A real change. return changed
def insert_read_only_node (c,p,name): if name=="": name = g.app.gui.runOpenFileDialog( title="Open", filetypes=[("All files", "*")], ) c.setHeadString(p,"@read-only %s" % name) c.redraw() parse = urlparse.urlparse(name) try: if parse[0] == 'ftp': file = FTPurl(name) # FTP URL elif parse[0] == 'http': file = urllib.urlopen(name) # HTTP URL else: file = open(name,"r") # local file g.es("..." + name) new = file.read() file.close() except IOError as msg: # g.es("error reading %s: %s" % (name, msg)) # g.es("...not found: " + name) c.setBodyString(p,"") # Clear the body text. return True # Mark the node as changed. else: ext = os.path.splitext(parse[2])[1] if ext.lower() in ['.htm', '.html']: #@+<< convert HTML to text >> #@+node:edream.110203113231.895: *3* << convert HTML to text >> fh = StringIO() fmt = AbstractFormatter(DumbWriter(fh)) # the parser stores parsed data into fh (file-like handle) parser = HTMLParser(fmt) # send the HTML text to the parser parser.feed(new) parser.close() # now replace the old string with the parsed text new = fh.getvalue() fh.close() # finally, get the list of hyperlinks and append to the end of the text hyperlinks = parser.anchorlist numlinks = len(hyperlinks) if numlinks > 0: hyperlist = ['\n\n--Hyperlink list follows--'] for i in range(numlinks): hyperlist.append("\n[%d]: %s" % (i+1,hyperlinks[i])) # 3/26/03: was i. new = new + ''.join(hyperlist) #@-<< convert HTML to text >> previous = p.b c.setBodyString(p,new) changed = (g.toUnicode(new) != g.toUnicode(previous)) if changed and previous != "": g.es("changed: %s" % name) # A real change. return changed
def replaceHTMLCodes(txt): txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt) txt = HTMLParser().unescape(txt) txt = txt.replace(""", "\"") txt = txt.replace("&", "&") return txt