def __init__(self): # Aggregated sgmlop parser self._parser = sgmlop.SGMLParser() self._parser.register(self) # Initialize behaviour of both the mixin and SGMLParser HtmlParserMixin.reset(self) sgmllib.SGMLParser.__init__(self)
def reset(self): self.rawdata = '' self.stack = [] self.lasttag = '???' self.nomoretags = 0 self.literal = 0 self.parser = sgmlop.SGMLParser() self.feed = self.parser.feed self.parser.register(self)
def __init__(self): # This module should be built already! import sgmlop self.parser = sgmlop.SGMLParser() self.parser.register(self) HarvestManSimpleParser.__init__(self) # Type self.typ = 1
def test_sgmlop(): """ Test whether sgmlop is available and working """ html = """\ <html>< title>Test sgmlop</title> <body> <p>This is a pargraph</p> <img src="img.jpg"/> <a href="http://www.python.org'>Python</a> </body> </html> """ # Return True for working and False for not-working # or not-present... try: import sgmlop class DummyHandler(object): links = [] def finish_starttag(self, tag, attrs): self.links.append(tag) pass parser = sgmlop.SGMLParser() parser.register(DummyHandler()) parser.feed(html) # Check if we got all the links... if len(DummyHandler.links) == 4: return True else: return False except ImportError, e: return False