def test_ExtractUrls_GivenCraigslistTermsPage_ReturnsEmptyList(self):
     url = 'https://www.craigslist.org/about/terms.of.use'
     text = self.body.replace('%url%', url)
     self.assertEqual(len(bot.extract_urls(text)), 0)
 def test_ExtractUrls_GivenNonCraigslistPageEndingWithHtml_ReturnsEmptyList(
         self):
     url = 'https://www.google.com/about.html'
     text = self.body.replace('%url%', url)
     self.assertEqual(len(bot.extract_urls(text)), 0)
 def test_ExtractUrls_GivenCraigslistSearchPage_ReturnsEmptyList(self):
     url = 'https://tampa.craigslist.org/d/for-sale/search/sss'
     text = self.body.replace('%url%', url)
     self.assertEqual(len(bot.extract_urls(text)), 0)
 def test_ExtractUrls_GivenCraigslistScamsPageWithRegularHTTP_ReturnsEmptyList(
         self):
     url = 'http://www.craigslist.org/about/scams'
     text = self.body.replace('%url%', url)
     self.assertEqual(len(bot.extract_urls(text)), 0)
 def test_ExtractUrls_GivenForumUrl_ReturnsEmptyList(self):
     url = 'https://forums.craigslist.org/?forumID=3'
     text = self.body.replace('%url%', url)
     self.assertEqual(len(bot.extract_urls(text)), 0)
 def test_ExtractUrls_GivenMultipleUrls_ReturnsMultipleUrls(self):
     url = 'http://indianapolis.craigslist.org/bar/d/bears/6451661128.html'
     url2 = 'https://dallas.craigslist.org/ftw/zip/d/20000-pounds-free-remotes/6426178725.html'
     text = self.body.replace('%url%', url)
     text = text.replace('%url2%', url)
     self.assertEqual(len(bot.extract_urls(text)), 2)
 def test_ExtractUrls_GivenFullUrlWithOnlyHTTP_ReturnsUrl(self):
     url = 'http://indianapolis.craigslist.org/bar/d/bears/6451661128.html'
     text = self.body.replace('%url%', url)
     self.assertEqual(len(bot.extract_urls(text)), 1)