Beispiel #1
0
def crawl(url):
  # get an index of all the pages in the section
  pages = parser.index(url)
  result = []
  
  # parse each page of the section to find video page urls
  with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    future_to_page = {executor.submit(parser.video_pages, page): page for page in pages}
    
    for future in concurrent.futures.as_completed(future_to_page):
      result.extend(future.result())
  
  # list of all the pages containing a video
  return result
Beispiel #2
0
 def test_index_not_last_page(self, foo):
   expected = ['http://www.jeuxvideo.com/toutes-les-videos/type-7340/?p=' + str(i) for i in range(1, 303+1)]
   actual = index('http://www.jeuxvideo.com/toutes-les-videos/type-7340/?p=290')
   self.assertEqual(actual, expected)
Beispiel #3
0
 def test_index_404(self, foo):
   expected = []
   actual = index('url')
   self.assertEqual(actual, expected)