def test_sources_fail(self): for s in self.sourcesFail: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) # very unlikely these sources pass if the scraping algo is not # tweaked on purpose for these cases self.assertFalse(lyrics.is_lyrics(res), "%s => %s" % (url, res))
def test_sources_incomplete(self): for s in self.sourcesIncomplete: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) self.assertTrue(lyrics.is_lyrics(res)) # these sources may pass if the html source evolve or after # a random improvement in the scraping algo: we want to # be noticed if it's the case. if is_lyrics_content_ok(s['title'], res): log.debug('Source %s actually return valid lyrics!' % s['url'])
def test_sources_ok(self): for s in self.sourcesOk: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)