def testPopNextURLAndMarkAsVisitedHandlesCount(self):
     # Populate the test database.
     session = self.database_handler.CreateSession()
     the_url = URL('http://www.microsoft.com/', 1)
     the_url.links_to = 500
     session.add(the_url)
     the_url = URL('http://www.google.com/', 1)
     the_url.links_to = 1000
     session.add(the_url)
     session.commit()
      # Test pop.
     crawler_thread = CrawlerThread(
         self.database_handler, None, self.url_lock)
     the_url = crawler_thread.PopNextURLAndMarkAsVisited()
     self.assertEqual('http://www.google.com/', the_url)
     # Test second pop.
     the_url = crawler_thread.PopNextURLAndMarkAsVisited()
     self.assertEqual('http://www.microsoft.com/', the_url)
 def testHandleHtmlResourceIncrementsLinksTo(self):
     # Populate the test database.
     session = self.database_handler.CreateSession()
     the_url = URL('http://www.google.com/', 1)
     the_url.links_to = 1000
     session.add(the_url)
     session.commit()
     # Create test file.
     file_handle = StringIO.StringIO(textwrap.dedent("""
     <a href='http://www.google.com/'>Google</a>
     """))
     file_handle.url = 'http://www.test.com'
     # Test handling of HTML resource.
     crawler_thread = CrawlerThread(
         self.database_handler, None, self.url_lock)
     crawler_thread.HandleHtmlResource(file_handle)
     query = session.query(URL)
     results = query.filter(URL.url == 'http://www.google.com/')
     self.assertEqual(1, results.count())
     the_url = results.first()
     self.assertEqual(1001, the_url.links_to)