def test_sources_fail(self): for s in self.sourcesFail: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) # very unlikely these sources pass if the scraping algo is not # tweaked on purpose for these cases self.assertFalse(lyrics.is_lyrics(res), "%s => %s" % (url, res))
def test_sources_fail(self): for s in self.sourcesFail: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) # very unlikely these sources pass if the scraping algo is not # tweaked on purpose for these cases self.assertFalse(lyrics.is_lyrics(res), "%s => %s" % (url, res))
def test_default_ok(self): """Test each lyrics engine with the default query""" for f in (lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom): res = f(definfo['artist'], definfo['title']) self.assertTrue(lyrics.is_lyrics(res)) self.assertTrue(is_lyrics_content_ok(definfo['title'], res))
def test_default_ok(self): """Test each lyrics engine with the default query""" for f in (lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom): res = f(definfo['artist'], definfo['title']) self.assertTrue(lyrics.is_lyrics(res)) self.assertTrue(is_lyrics_content_ok(definfo['title'], res))
def test_is_lyrics(self): texts = ['LyricsMania.com - Copyright (c) 2013 - All Rights Reserved'] texts += [ """All material found on this site is property\n of mywickedsongtext brand""" ] for t in texts: self.assertFalse(lyrics.is_lyrics(t))
def test_mocked_source_ok(self): """Test that lyrics of the mocked page are correctly scraped""" url = self.source['url'] + self.source['path'] if os.path.isfile(url_to_filename(url)): res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(self.source['title'], res), url)
def test_is_lyrics(self): texts = ["LyricsMania.com - Copyright (c) 2013 - All Rights Reserved"] texts += [ """All material found on this site is property\n of mywickedsongtext brand""" ] for t in texts: self.assertFalse(lyrics.is_lyrics(t))
def test_sources_incomplete(self): for s in self.sourcesIncomplete: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) self.assertTrue(lyrics.is_lyrics(res)) # these sources may pass if the html source evolve or after # a random improvement in the scraping algo: we want to # be noticed if it's the case. if is_lyrics_content_ok(s['title'], res): log.debug('Source %s actually return valid lyrics!' % s['url'])
def test_default_ok(self): """Test default engines with the default query""" if not check_lyrics_fetched(): self.skipTest("Run lyrics_download_samples.py script first.") for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom), DEFAULT_SOURCES): if os.path.isfile(url_to_filename(s['url'] + s['path'])): res = fun(s['artist'], s['title']) self.assertTrue(lyrics.is_lyrics(res)) self.assertTrue( is_lyrics_content_ok(DEFAULT_SONG['title'], res))
def test_google_sources_ok(self): """Test if lyrics present on websites registered in beets google custom search engine are correctly scraped.""" if not check_lyrics_fetched(): self.skipTest("Run lyrics_download_samples.py script first.") for s in GOOGLE_SOURCES: url = s['url'] + s['path'] if os.path.isfile(url_to_filename(url)): res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
def test_google_sources_ok(self): """Test if lyrics present on websites registered in beets google custom search engine are correctly scraped.""" if not check_lyrics_fetched(): self.skipTest("Run lyrics_download_samples.py script first.") for s in GOOGLE_SOURCES: url = s['url'] + s['path'] if os.path.isfile(url_to_filename(url)): res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
def test_sources_incomplete(self): for s in self.sourcesIncomplete: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) self.assertTrue(lyrics.is_lyrics(res)) # these sources may pass if the html source evolve or after # a random improvement in the scraping algo: we want to # be noticed if it's the case. if is_lyrics_content_ok(s['title'], res): log.debug('Source %s actually return valid lyrics!' % s['url'])
def test_default_ok(self): """Test default engines with the default query""" if not check_lyrics_fetched(): self.skipTest("Run lyrics_download_samples.py script first.") for (fun, s) in zip([lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom, lyrics.fetch_musixmatch], DEFAULT_SOURCES): url = s['url'] + s['path'] if os.path.isfile(url_to_filename(url)): res = fun(s['artist'], s['title']) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
def test_sources_ok(self): for s in self.sourcesOk: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_url(url) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
def test_missing_lyrics(self): self.assertFalse(lyrics.is_lyrics(LYRICS_TEXTS['missing_texts']))
def test_missing_lyrics(self): self.assertFalse(lyrics.is_lyrics(LYRICS_TEXTS['missing_texts']))
def test_sources_ok(self): for s in self.sourcesOk: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)