def test_ExtractUrls_GivenCraigslistTermsPage_ReturnsEmptyList(self): url = 'https://www.craigslist.org/about/terms.of.use' text = self.body.replace('%url%', url) self.assertEqual(len(bot.extract_urls(text)), 0)
def test_ExtractUrls_GivenNonCraigslistPageEndingWithHtml_ReturnsEmptyList( self): url = 'https://www.google.com/about.html' text = self.body.replace('%url%', url) self.assertEqual(len(bot.extract_urls(text)), 0)
def test_ExtractUrls_GivenCraigslistSearchPage_ReturnsEmptyList(self): url = 'https://tampa.craigslist.org/d/for-sale/search/sss' text = self.body.replace('%url%', url) self.assertEqual(len(bot.extract_urls(text)), 0)
def test_ExtractUrls_GivenCraigslistScamsPageWithRegularHTTP_ReturnsEmptyList( self): url = 'http://www.craigslist.org/about/scams' text = self.body.replace('%url%', url) self.assertEqual(len(bot.extract_urls(text)), 0)
def test_ExtractUrls_GivenForumUrl_ReturnsEmptyList(self): url = 'https://forums.craigslist.org/?forumID=3' text = self.body.replace('%url%', url) self.assertEqual(len(bot.extract_urls(text)), 0)
def test_ExtractUrls_GivenMultipleUrls_ReturnsMultipleUrls(self): url = 'http://indianapolis.craigslist.org/bar/d/bears/6451661128.html' url2 = 'https://dallas.craigslist.org/ftw/zip/d/20000-pounds-free-remotes/6426178725.html' text = self.body.replace('%url%', url) text = text.replace('%url2%', url) self.assertEqual(len(bot.extract_urls(text)), 2)
def test_ExtractUrls_GivenFullUrlWithOnlyHTTP_ReturnsUrl(self): url = 'http://indianapolis.craigslist.org/bar/d/bears/6451661128.html' text = self.body.replace('%url%', url) self.assertEqual(len(bot.extract_urls(text)), 1)