def __init__(self, http_resp): # Save "_parse" reference orig_parse = self._parse # Monkeypatch it! self._parse = lambda arg: None # Now call parent's __init__ HTMLParser.__init__(self, http_resp) # Restore it self._parse = orig_parse
def __init__(self, http_resp): # Create the proper parser instance, please note that # the order in which we ask for the type is not random, # first we discard the images which account for a great # % of the URLs in a site, then we ask for WML which is # a very specific thing to match, then we try text or HTML # which is very generic (if we would have exchanged these two # we would have never got to WML), etc. if http_resp.is_image(): msg = 'There is no parser for images.' raise BaseFrameworkException(msg) elif self._is_wml(http_resp): parser = WMLParser(http_resp) elif http_resp.is_text_or_html(): parser = HTMLParser(http_resp) elif self._is_pdf(http_resp): parser = PDFParser(http_resp) elif self._is_swf(http_resp): parser = SWFParser(http_resp) else: msg = 'There is no parser for "%s".' % http_resp.get_url() raise BaseFrameworkException(msg) self._parser = parser
def test_parse_html_performance(self): headers = Headers() headers['content-type'] = 'text/html' body = file(self.HTML_FILE).read() url = URL('http://www.w3af.org/') response = HTTPResponse(200, body, headers, url, url, charset='utf-8') #self.measure_memory(1) parsers = [] for _ in xrange(40): p = HTMLParser(response) p.get_dom() #parsers.append(p) # Clear any reference to the parser #del p #parsers = [] #self.measure_memory(2) time.sleep(360)