Esempio n. 1
0
 def test_no_links(self):
     self.start_server(TestCrawler.NO_LINKS_HTML)
     crawler = Crawler(TestCrawler.SERVER)
     crawler.crawl()
     self.assertMultiLineEqual(
         "[\n  {\n    \"assets\": [], \n    \"url\": \"http://127.0.0.1:5000/\"\n  }\n]",
         crawler.assets_json())
Esempio n. 2
0
    def test_image_links(self):
        self.start_server(TestCrawler.IMAGE_LINKS_HTML)
        crawler = Crawler(TestCrawler.SERVER)
        crawler.crawl()
        expected = \
    '''[
  {
    "assets": [
      "http://127.0.0.1:5000/funny.gif"
    ], 
    "url": "http://127.0.0.1:5000/"
  }
]'''
        self.assertMultiLineEqual(expected, crawler.assets_json())
Esempio n. 3
0
    def test_stylesheet_links(self):
        self.start_server(TestCrawler.STYLESHEET_LINKS_HTML)
        crawler = Crawler(TestCrawler.SERVER)
        crawler.crawl()
        expected = \
    '''[
  {
    "assets": [
      "http://127.0.0.1:5000/source.css"
    ], 
    "url": "http://127.0.0.1:5000/"
  }
]'''
        self.assertMultiLineEqual(expected, crawler.assets_json())
Esempio n. 4
0
    def test_anchor_links(self):
        self.start_server(TestCrawler.ANCHOR_LINKS_HTML)
        crawler = Crawler(TestCrawler.SERVER)
        crawler.crawl()
        expected = \
    '''[
  {
    "assets": [], 
    "url": "http://127.0.0.1:5000/"
  }, 
  {
    "assets": [], 
    "url": "http://127.0.0.1:5000/fetch.html"
  }
]'''
        self.assertMultiLineEqual(expected, crawler.assets_json())
Esempio n. 5
0
    def test_file_links(self):
        self.start_server(TestCrawler.FILE_LINKS_HTML)
        crawler = Crawler(TestCrawler.SERVER)
        crawler.crawl()
        expected = \
    '''[
  {
    "assets": [
      "http://127.0.0.1:5000/source.pdf", 
      "http://127.0.0.1:5000/source.txt"
    ], 
    "url": "http://127.0.0.1:5000/"
  }, 
  {
    "assets": [], 
    "url": "http://127.0.0.1:5000/source.pdf"
  }, 
  {
    "assets": [], 
    "url": "http://127.0.0.1:5000/source.txt"
  }
]'''
        self.assertMultiLineEqual(expected, crawler.assets_json())