def reset(self, query): #query is the list of keywords,eg:['yu','aoi'] query = [escape(word) for word in query] words = '|'.join(query) self.Q_PATTERN = re.compile(r'(?P<query>' + words + r')', re.I) self.highlight = 0 BaseHTMLProcessor.reset(self)
def reset(self,query): #query is the list of keywords,eg:['yu','aoi'] query=[escape(word) for word in query] words='|'.join(query) self.Q_PATTERN=re.compile(r'(?P<query>' + words + r')',re.I) self.highlight=0 BaseHTMLProcessor.reset(self)
def end_a(self): if self._start_a: method = getattr( self, self._start_a.__name__.replace('start','end'), None ) if method: method() else: BaseHTMLProcessor.unknown_endtag(self,"a")
def reset(self): self._start_a = None self.methodQueue = [] self.divClassStack = [] self.articleRec = [] BaseHTMLProcessor.reset(self)
def end_a(self): if self._start_a: method = getattr(self, self._start_a.__name__.replace('start', 'end'), None) if method: method() else: BaseHTMLProcessor.unknown_endtag(self, "a")
def unknown_endtag(self, tag): self.flushcolor() BaseHTMLProcessor.unknown_endtag(self, tag) if self.needcolor: self.colorindex = len(self.pieces)
def unknown_starttag(self, tag, attrs): self.flushcolor() BaseHTMLProcessor.unknown_starttag(self, tag, attrs) if self.needcolor: self.colorindex = len(self.pieces)
def __init__(self, basedir): BaseHTMLProcessor.__init__(self) self.basedir = basedir
def reset(self): # extend (called from __init__ in ancestor) # Reset all data attributes self.verbatim = 0 BaseHTMLProcessor.reset(self)
def reset(self): '''extend (called from __init__ in ancestor)''' self.verbatim = 0 BaseHTMLProcessor.reset(self)
def reset(self): self.meta = {'data': ''} self.div_stack = [] # keep track of div self.methodStack = [] BaseHTMLProcessor.reset(self)
def start_a(self,attrs): if self._start_a: self._start_a(attrs) else: BaseHTMLProcessor.unknown_starttag(self,"a",attrs)
def start_a(self, attrs): if self._start_a: self._start_a(attrs) else: BaseHTMLProcessor.unknown_starttag(self, "a", attrs)
def reset(self): self.verbatim = 0 BaseHTMLProcessor.reset(self)
def reset(self): BaseHTMLProcessor.reset(self) self.colorindex = 0 self.needcolor = 0
def reset(self): self.meta = {'data':''} self.div_stack = [] # keep track of div self.methodStack = [] BaseHTMLProcessor.reset(self)
#!/usr/bin/python ''' htmlQuo.py @author ffmmx ''' from BaseHTMLProcessor import BaseHTMLProcessor if __name__=='__main__': htmlSource=''' <html> <head> <title>Test page</title> </head> <body> <ul> <li><a href=index.html>Home</a></li> <li><a href=toc.html>Table of contents</a></li> <li><a href=history.html>Revision history</a></li> </body> </html> ''' parser=BaseHTMLProcessor() print parser.feed(htmlSource)
def __init__(self, usefonts=0): BaseHTMLProcessor.__init__(self) self.usefonts = usefonts
#! /usr/bin/python htmlSource = """ <html> <head> <title>Test page</title> </head> <body> <ul> <li><a href=index.html>Home</a></li> <li><a href=toc.html>Table of contents</a></li> <li><a href=history.html>Revision history</a></li> </body> </html> """ import sys sys.path.append("../..") from BaseHTMLProcessor import BaseHTMLProcessor parser = BaseHTMLProcessor() parser.feed(htmlSource) print parser.output()
def flushcolor(self): if self.colorindex: buffer = "".join(self.pieces[self.colorindex:]) self.pieces = self.pieces[:self.colorindex] self.colorindex = 0 BaseHTMLProcessor.handle_data(self, self.HTMLfontify(buffer))
import urllib from BaseHTMLProcessor import BaseHTMLProcessor sock = urllib.urlopen("../Programmer/html/examples.html") htmlSource = sock.read() sock.close() parser = BaseHTMLProcessor() parser.feed(htmlSource) f = open('../Programmer/html/examples1.html', 'w') f.write(parser.output())