def testParsingError(self): url = "http://test.co.kr" def loadHtml(self,url): with open('parse_error.html') as f: return f.read() Scraper.loadHtml = loadHtml scraper = Scraper(url).load() self.assertEquals ("no title", scraper.title) thumbnailImages = scraper.makeThumbnails(scraper.extractImageUrls(scraper.soup, scraper.siteUrl)) self.assertEquals([], thumbnailImages)
def testEtomato(self): url = "http://news.etomato.com/Home/ReadNews.aspx?no=201886" scraper = Scraper(url) def loadHtml(url): with open('news_etomato.html') as f: return f.read() scraper.loadHtml = loadHtml self.assertEquals (url, scraper.url) self.assertEquals ('http://news.etomato.com', scraper.siteUrl) scraper.load() expectedTitle = u'경제전문 멀티미디어 뉴스 - 뉴스 토마토 -' self.assertEquals (expectedTitle, scraper.title) thumbnailImages = scraper.makeThumbnails(scraper.extractImageUrls(scraper.soup, scraper.siteUrl)) self.assertTrue (thumbnailImages is not None)