async def extract_lyrics(cls, request: Request) -> Lyrics: request.headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0" } bs = await request.bs center = bs.body.find("div", {"class": "col-xs-12 col-lg-8 text-center"}) if not center: raise NoLyrics lyrics = center.find("div", {"class": None}).text lyrics = re.sub(r"<br>", " ", lyrics) lyrics = re.sub(r"<i?>\W*", "[", lyrics) lyrics = re.sub(r"\W*</i>", "]", lyrics) lyrics = re.sub(r""", "\"", lyrics) lyrics = re.sub(r"</div>", "", lyrics) title = center.find("h1").text.strip()[1:-8] artist = bs.select_one("div.lyricsh h2 b").string[:-7] lyrics = lyrics.strip() return Lyrics(title, lyrics, artist=artist)
async def test_can_handle(self): async with ClientSession() as session: assert await Lyricsmode.can_handle( Request( session, "https://www.lyricsmode.com/lyrics/e/ed_sheeran/a_team_lyrics.html" )) is True
async def test_translated_extraction(self): async with ClientSession() as session: lyrics = await Animelyrics.extract_lyrics(Request(session, "http://www.animelyrics.com/anime/swimminganime/splashfree.htm")) lyrics_hash = hashlib.sha256(lyrics.lyrics.encode("utf-8")).hexdigest() assert lyrics_hash == "f8b7b6fc53bdbc738a1b8beea09d086c6ca362c4c4857930488bc10153716dfd" assert lyrics.title == "SPLASH FREE" assert lyrics.artist == "STYLE FIVE" async with ClientSession() as session: lyrics = await Animelyrics.extract_lyrics(Request(session, "http://www.animelyrics.com/anime/kmb/fnknhnh.htm")) assert lyrics.title == "Futari no Kimochi no Honto no Himitsu" assert lyrics.artist == "Yasuna" assert lyrics.lyrics == lyrics_text
async def test_can_handle(self): async with ClientSession() as session: assert await AZLyrics.can_handle( Request( session, "https://www.azlyrics.com/lyrics/edsheeran/theateam.html") ) is True
async def test_can_handle(self): async with ClientSession() as session: assert await MusixMatch.can_handle( Request( session, "https://www.musixmatch.com/lyrics/Dua-Lipa/New-Rules") ) is True
async def test_can_handle(self): async with ClientSession() as session: assert await LyricalNonsense.can_handle( Request( session, "http://www.lyrical-nonsense.com/lyrics/radwimps/zen-zen-zense/" )) is True
async def test_untranslated_extraction(self): async with ClientSession() as session: lyrics = await Animelyrics.extract_lyrics(Request(session, "https://www.animelyrics.com/anime/accelworld/chasetheworld.htm")) lyrics_hash = hashlib.sha256(lyrics.lyrics.encode("utf-8")).hexdigest() assert lyrics_hash == "3cf6a7bc4ef62cccca49e228ef556b58d459c68f4cb0dba7240cc4dffd6b3b20" assert lyrics.title == "Chase the world" assert lyrics.artist == "May'n"
async def test_extraction(self): async with ClientSession() as session: lyrics = await Genius.extract_lyrics( Request(session, "https://genius.com/Ed-sheeran-the-a-team-lyrics")) assert lyrics.lyrics == lyrics_text assert lyrics.title == "The A Team" assert lyrics.artist == "Ed Sheeran" assert lyrics.release_date == datetime(2011, 6, 12)
async def test_extraction(self): async with ClientSession() as session: lyrics = await Lyricsmode.extract_lyrics( Request( session, "https://www.lyricsmode.com/lyrics/e/ed_sheeran/a_team_lyrics.html" )) assert lyrics.title == "A Team" assert lyrics.artist == "Ed Sheeran" assert lyrics.lyrics == lyrics_ed_sheeran_a_team
async def test_extraction(self): async with ClientSession() as session: lyrics = await MusixMatch.extract_lyrics( Request( session, "https://www.musixmatch.com/lyrics/Dua-Lipa/New-Rules")) assert lyrics.title == "New Rules" assert lyrics.artist == "Dua Lipa" assert lyrics.release_date == datetime(2017, 6, 2) assert lyrics.lyrics == lyrics_dua_lipa_new_rules
async def test_extraction(self): async with ClientSession() as session: lyrics = await AZLyrics.extract_lyrics( Request( session, "https://www.azlyrics.com/lyrics/edsheeran/theateam.html")) lyrics_hash = hashlib.sha256(lyrics.lyrics.encode("utf-8")).hexdigest() assert lyrics_hash == "e77a63fb93b1d0f373b859963532e41a2dbf2d68d290bf3f919b93b174fe26e3" assert lyrics.title == "The A Team" assert lyrics.artist == "Ed Sheeran"
async def test_extraction(self): async with ClientSession() as session: lyrics = await LyricalNonsense.extract_lyrics( Request( session, "http://www.lyrical-nonsense.com/lyrics/radwimps/zen-zen-zense/" )) assert lyrics.title == "Zenzenzense" assert lyrics.artist == "RADWIMPS" assert lyrics.lyrics == lyrics_radwimps_zenzenzense async with ClientSession() as session: lyrics = await LyricalNonsense.extract_lyrics( Request( session, "https://www.lyrical-nonsense.com/lyrics/himouto-umaru-chan-r-theme-songs/umarun-taisou-sisters/" )) assert lyrics.title == "Umarun Taisou" assert lyrics.artist == "SisterS" assert lyrics.lyrics == lyrics_sisters_umarun_taisou
async def extract_lyrics(cls, request: Request) -> Lyrics: bs = await request.bs title = next(bs.select_one("div ~ h1").children).string.strip() artist = bs.find(text=ARTIST_MATCHER) if artist: artist = ARTIST_MATCHER.match(artist).group(1) lyrics_window = bs.find("table", attrs={"cellspacing": "0", "border": "0"}) if lyrics_window: # shit's been translated log.info("these lyrics have been translated... sighs...") lines = lyrics_window.find_all("tr") lyrics = "" for line in lines: p = line.td if p: p.span.dt.replace_with("") for br in p.span.find_all("br"): br.replace_with("\n") lyrics += p.span.text lyrics = lyrics.strip() else: text_url = re.sub(r"\.html?", ".txt", request.url) request.url = text_url request.resp_kwargs["allow_redirects"] = False text = await request.text content = text.strip() match = re.search(r"-{10,}(.+?)-{10,}", content, flags=re.DOTALL) if match: lyrics = match.group(1).strip() else: raise NoLyrics lyrics = lyrics.replace("\xa0", " ").replace("\r", "") return Lyrics(title, lyrics, artist=artist)
async def extract_lyrics(cls, request: Request) -> Lyrics: request.headers = { "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1" } try: (await request.resp).raise_for_status() except ClientResponseError: raise exceptions.NotAllowedError bs = await request.bs if bs.find_all("div", attrs={ "class": "mxm-empty-state", "data-reactid": "87" }): raise exceptions.NoLyrics lyrics_frame = bs.find_all("div", {"class": "mxm-lyrics"}) if not lyrics_frame: raise exceptions.NoLyrics lyrics_window = lyrics_frame[0].find_all("div", {"class": "mxm-lyrics"}) if not lyrics_window: raise exceptions.NoLyrics lyrics_window = lyrics_window[0].span for garbage in bs.find_all("script"): garbage.replace_with(2 * "\n") lyrics = lyrics_window.text title = bs.find("h1", attrs={ "class": "mxm-track-title__track" }).contents[-1].strip() artist = bs.select_one("a.mxm-track-title__artist").string release_date = None date_str = bs.select_one( "div.mxm-track-footer__album h3.mui-cell__subtitle") if date_str: date_str = ORDINAL_MATCHER.sub(lambda m: m.group(1).zfill(2), date_str.string) release_date = datetime.strptime(date_str, "%b %d %Y") return Lyrics(title, lyrics, artist=artist, release_date=release_date)
async def test_can_handle(self): async with ClientSession() as session: assert await Animelyrics.can_handle(Request(session, "http://www.animelyrics.com/anime/swimminganime/splashfree.htm")) is True
async def test_can_handle(self): async with ClientSession() as session: assert await Genius.can_handle( Request(session, "https://genius.com/Ed-sheeran-the-a-team-lyrics") ) is True