Exemple #1
0
    def testParsingError(self):
        url = "http://test.co.kr"
        def loadHtml(self,url):
            with open('parse_error.html') as f: return f.read()

        Scraper.loadHtml = loadHtml

        scraper = Scraper(url).load()
        self.assertEquals ("no title", scraper.title)
        thumbnailImages = scraper.makeThumbnails(scraper.extractImageUrls(scraper.soup, scraper.siteUrl))
        self.assertEquals([], thumbnailImages)
Exemple #2
0
    def testEtomato(self):
        url = "http://news.etomato.com/Home/ReadNews.aspx?no=201886"
        scraper = Scraper(url)
        def loadHtml(url):
            with open('news_etomato.html') as f: return f.read()
        scraper.loadHtml = loadHtml
            
        self.assertEquals (url, scraper.url)
        self.assertEquals ('http://news.etomato.com', scraper.siteUrl)

        scraper.load()
        expectedTitle = u'경제전문 멀티미디어 뉴스 - 뉴스 토마토 -'
        self.assertEquals (expectedTitle, scraper.title)

        thumbnailImages = scraper.makeThumbnails(scraper.extractImageUrls(scraper.soup, scraper.siteUrl))
        self.assertTrue (thumbnailImages is not None)