def __init__(self, url): self.url = url emptyResultParser = MyHtmlParser() getsPageParser = MyHtmlParser() getsPageParser.links = ['http://www.example.com/found_page'] self.example_results = { 'http://www.example.com/404': { 'failure': True, 'parsed_data': None, 'url': 'http://www.example.com/404' }, 'http://www.example.com/gets_page': { 'failure': False, 'parsed_data': getsPageParser, 'url': 'http://www.example.com/gets_page' }, 'http://www.example.com/found_page': { 'failure': False, 'parsed_data': emptyResultParser, 'url': 'http://www.example.com/found_page' }, 'http://www.example.com/handles_redirects': { 'failure': False, 'parsed_data': emptyResultParser, 'url': 'http://www.example.com/actual_page' }, }
def __init__(self, url): self.url = url emptyResultParser = MyHtmlParser() getsPageParser = MyHtmlParser() getsPageParser.links = ['http://www.example.com/found_page'] self.example_results = { 'http://www.example.com/404': { 'failure': True, 'results': None }, 'http://www.example.com/gets_page': { 'failure': False, 'results': ['http://www.example.com/found_page'] }, 'http://www.example.com/found_page': { 'failure': False, 'results': 'data' }, 'http://www.example.com/handles_redirects': { 'failure': False, 'results': emptyResultParser }, }
def GetParsedData(self): encoding = self.req_response.headers.getparam("charset") parsed_data = None if encoding: parsed_data = self.req_response.read().decode(encoding) else: parsed_data = self.req_response.read() parser = MyHtmlParser() parser.feed(parsed_data) return parser
class TestParser(unittest.TestCase): def setUp(self): self.parser = MyHtmlParser() def test_finds_css_links(self): self.parser.feed('<link rel="stylesheet" href="/style.css" />') self.assertEqual(self.parser.css, ['/style.css']) def test_finds_js_script_links(self): self.parser.feed('<script src="/launch.js" />') self.assertEqual(self.parser.scripts, ['/launch.js']) def test_finds_url_links(self): self.parser.feed('<a href="http://www.google.com/" />') self.assertEqual(self.parser.links, ['http://www.google.com/']) def test_finds_embedded_images(self): self.parser.feed('<img src="/icon.jpeg" />') self.assertEqual(self.parser.images, ['/icon.jpeg'])
def setUp(self): self.parser = MyHtmlParser()