def __init__(self, url):
        self.url = url

        emptyResultParser = MyHtmlParser()
        getsPageParser = MyHtmlParser()
        getsPageParser.links = ['http://www.example.com/found_page']

        self.example_results = {
            'http://www.example.com/404': {
                'failure': True,
                'parsed_data': None,
                'url': 'http://www.example.com/404'
            },
            'http://www.example.com/gets_page': {
                'failure': False,
                'parsed_data': getsPageParser,
                'url': 'http://www.example.com/gets_page'
            },
            'http://www.example.com/found_page': {
                'failure': False,
                'parsed_data': emptyResultParser,
                'url': 'http://www.example.com/found_page'
            },
            'http://www.example.com/handles_redirects': {
                'failure': False,
                'parsed_data': emptyResultParser,
                'url': 'http://www.example.com/actual_page'
            },
        }
    def __init__(self, url):
        self.url = url

        emptyResultParser = MyHtmlParser()
        getsPageParser = MyHtmlParser()
        getsPageParser.links = ['http://www.example.com/found_page']

        self.example_results = {
            'http://www.example.com/404': {
                'failure': True,
                'results': None
            },
            'http://www.example.com/gets_page': {
                'failure': False,
                'results': ['http://www.example.com/found_page']
            },
            'http://www.example.com/found_page': {
                'failure': False,
                'results': 'data'
            },
            'http://www.example.com/handles_redirects': {
                'failure': False,
                'results': emptyResultParser
            },
        }
예제 #3
0
    def GetParsedData(self):
        encoding = self.req_response.headers.getparam("charset")
        parsed_data = None
        if encoding:
            parsed_data = self.req_response.read().decode(encoding)
        else:
            parsed_data = self.req_response.read()

        parser = MyHtmlParser()
        parser.feed(parsed_data)

        return parser
예제 #4
0
class TestParser(unittest.TestCase):
  def setUp(self):
    self.parser = MyHtmlParser()

  def test_finds_css_links(self):
      self.parser.feed('<link rel="stylesheet" href="/style.css" />')
      self.assertEqual(self.parser.css, ['/style.css'])

  def test_finds_js_script_links(self):
    self.parser.feed('<script src="/launch.js" />')
    self.assertEqual(self.parser.scripts, ['/launch.js'])

  def test_finds_url_links(self):
    self.parser.feed('<a href="http://www.google.com/" />')
    self.assertEqual(self.parser.links, ['http://www.google.com/'])

  def test_finds_embedded_images(self):
    self.parser.feed('<img src="/icon.jpeg" />')
    self.assertEqual(self.parser.images, ['/icon.jpeg'])
예제 #5
0
 def setUp(self):
   self.parser = MyHtmlParser()