def search_song(self, artist, title): to_delete = ["'", '!', '(', ')', '[', ']'] link = "http://www.lyrster.com/lyrics/{}-lyrics-{}.html".format( self.prepare_url_parameter(title, to_delete=to_delete), self.prepare_url_parameter(artist, to_delete=to_delete)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) head_pane = soup.find("div", {"id": "lyrics-info"}) if head_pane is None: # song wasn't found and we're redirected to main page return None song_title = head_pane.find("h1").text.replace(" Lyrics", "") song_artist = head_pane.find("a").text lyric_pane = soup.find("div", {"id": "lyrics"}) lyric = self.parse_verse_block(lyric_pane) if lyric == "We do not have the complete song's lyrics just yet." or lyric.startswith( 'Shortcut to '): # empty song page without lyric return None else: return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): # no need to delete/replace chars - site will handle redirects link = 'http://www.1songlyrics.com/{}/{}/{}.html'.format( self.prepare_url_parameter(artist[0]), self.prepare_url_parameter(artist), self.prepare_url_parameter(title)) page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) if soup.html.title.text == '1 Song Lyrics': # song not found, redirected to main page return None main_pane = soup.find('div', {'id': 'wrapper'}) headers = main_pane.findAll('h2', recursive=False) song_artist = headers[1].text song_title = headers[0].text[:-12] lyrics_pane = soup.findAll('p', {'class': False})[1] lyrics = self.parse_verse_block(lyrics_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ['.', '(', ')', "'", ',', '.', '?', '-'] to_replace = [' ', ' & '] link = 'http://www.nitrolyrics.com/{}_{}-lyrics.html'.format( self.prepare_url_parameter(artist, to_delete, to_replace), self.prepare_url_parameter(title, to_delete, to_replace)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) head_pane = soup.find('div', {'class', 'lyric'}) if head_pane is None: # empty page - song not found return None song_artist = head_pane.find('a').text song_title = head_pane.find('h1').text.replace(' Lyrics', '') lyric_pane = soup.find('div', {'class': 'lyricContent'}) lyric = self.parse_verse_block(lyric_pane.find('p')) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): link = 'https://www.uta-net.com/search/?Aselect=2&Keyword={}'.format( self.prepare_url_parameter(title, delimiter='+')) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) search_pane = soup.find('tbody') for item in search_pane.findAll('tr', recursive=False): tds = item.findAll('td', recursive=False) song_artist = tds[1].a.text song_title = tds[0].a.text song_id = tds[0].a['href'].split('/')[2] if self.compare_strings(artist, song_artist) and self.compare_strings(title, song_title): song_link = 'https://www.uta-net.com/song/{}/'.format(song_id) song_page = self.download_webpage_text(song_link) if song_page: soup = self.prepare_soup(song_page) lyric_pane = soup.find('div', {'id': 'kashi_area'}) if lyric_pane: lyric = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): artist_link = 'http://www.lyrics.com/artist/{}/'.format( self.prepare_url_parameter(artist)) artist_page = self.download_webpage(artist_link) if artist_page: soup = self.prepare_soup(artist_page) artist_pane = soup.find('p', {'class': 'artist'}) if not artist_pane: # artist page not found, redirect to search page return None song_artist = artist_pane.a.text for item in soup.findAll('a', href=re.compile('lyric/[0-9]+')): song_title = item.text if self.compare_strings(title, song_title): song_link = 'http://www.lyrics.com' + item['href'] song_page = self.download_webpage(song_link) if song_page: soup = self.prepare_soup(song_page) lyrics_pane = soup.find('pre', {'id': 'lyric-body-text'}) lyrics = self.parse_verse_block_custom(lyrics_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ["'", '(', ')'] to_replace = [' '] link = "http://www.metrolyrics.com/{}-lyrics-{}.html".format( self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace), self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) lyric_pane = soup.find("div", {"id": "lyrics-body-text"}) if lyric_pane is None: # song not found return None lyric = '' for verse_pane in lyric_pane.findAll("p", {"class": "verse"}): verse = self.parse_verse_block(verse_pane) lyric += (verse + '\n\n') if lyric.strip(): # if lyric is empty that mean that they've only empty stub page for that song return Song(artist, title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): link = "http://search.j-lyric.net/index.php?kt={}&ct=0&ka={}&ca=0".format( self.prepare_url_parameter(title, delimiter='+'), self.prepare_url_parameter(artist)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) lyric_list_pane = soup.find('div', {'class': 'bdy'}) if lyric_list_pane is None: return None artist_pane = lyric_list_pane.find('p', {'class': 'sml'}).find('a') song_artist = artist_pane.text title_pane = lyric_list_pane.find('p', {'class': 'mid'}).find('a') song_title = title_pane.text song_link = title_pane['href'] lyric_page = self.download_webpage(song_link) soup = self.prepare_soup(lyric_page) lyric_pane = soup.find('p', {'id': 'Lyric'}) lyric = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): # it seems that they can handle any symbol in link and redirect it correct page so there is no need replace and # delete anything link = 'http://www.lyricsaction.com/{}/{}/{}-lyrics.htm'.format( self.prepare_url_parameter(artist[0]), self.prepare_url_parameter(artist), self.prepare_url_parameter(title)) page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) main_pane = soup.find('div', {'class', 'node'}) title_pane = main_pane.find('h2', recursive=False) title_parts = title_pane.text.split(' lyrics - ', 2) if len(title_parts) < 2: # song not found, redirected to main page return None song_artist = title_parts[0] song_title = title_parts[1] lyrics_pane = main_pane.find('div', { 'class': 'entry' }, recursive=False).p lyrics = self.parse_verse_block(lyrics_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ['.', ',', '!', '?', '(', ')', '~', '/', "'", '"'] to_replace = [' '] link = 'http://lyrics.snakeroot.ru/{}/{}/{}_{}.html'.format( self.prepare_url_parameter(artist[0].upper()), self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace, delimiter='_'), self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace, delimiter='_').lower(), self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace, delimiter='_').lower()) # return 404 if song not found page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) main_pane = soup.find('div', {'id': 'content'}) title_pane = main_pane.find('h2', recursive=False) song_artist = title_pane.a.text song_title = title_pane.a.next_sibling[3:] # searching for first non-empty paragraph for lyrics_pane in reversed(main_pane.findAll('p')): if lyrics_pane.text.strip() != '': lyrics = self.parse_verse_block(lyrics_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ['(', ')', "'", ','] link = 'http://www.lyricsreg.com/lyrics/{}/{}/'.format( self.prepare_url_parameter(artist.replace(' & ', ' and '), to_delete=to_delete), self.prepare_url_parameter(title.replace(' & ', ' and '), to_delete=to_delete)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) main_pane = soup.find('section', {'id': 'maincontent'}) header = main_pane.find('h2', {'class': 'content-subhead'}) artist_title = header.text.split(' lyrics : ') song_artist = artist_title[0] song_title = artist_title[1][1:-1] lyric_pane = main_pane.find('div', {'style': 'text-align:center'}, recursive=False) lyric = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): to_delete = ['!', '?', '(', ')', "'", '"', ','] to_replace = [' ', ' & ', ' / ', '/', ':'] link = "http://www.lyricshuddle.com/{}/{}/{}.html".format( self.prepare_url_parameter(artist[0]), self.prepare_url_parameter(artist, to_delete, to_replace), self.prepare_url_parameter(title, to_delete, to_replace)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) if soup.text == 'Impossible to find lyrics.': return None head_pane = soup.find("div", {"class": "location"}) head_pane_parts = head_pane.findAll("a") song_artist = head_pane_parts[2].text song_title = head_pane_parts[3].text main_pane = soup.find("div", {"class": "lyricstext"}) self.remove_tags_from_block(main_pane, ['div', 'style']) lyric = self.parse_verse_block(main_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): to_delete = ['.', "'", '?', '(', ')'] to_replace = [' ', ' & '] link = 'http://www.lyricsmania.com/{}_lyrics_{}.html'.format( self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace, delimiter='_'), self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace, delimiter='_')) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) main_pane = soup.find('div', {'class': 'lyrics-body'}) if main_pane is None: # song wasn't found on site and we're redirected on main page where we can't find lyric pane return None else: main_pane = main_pane.find('div', {'class': 'fb-quotable'}) # search for artist and title header_pane = soup.find('div', {'class': 'lyrics-nav'}) song_title = header_pane.find('h2').text song_title = re.sub(' lyrics$', '', song_title) song_artist = header_pane.find('h3').text # first part of lyrics to the left of video block lyric1 = self.parse_verse_block(main_pane) lyric_pane = main_pane.find('div', {'class': 'p402_premium'}) # second part of lyrics below of video block lyric2 = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric1 + '\n\n' + lyric2]))
def search_song(self, artist, title): to_replace = [ '.', ',', "'", '!', '?', '&', '(', ')', '-', '>', '/', ' ' ] link = 'http://www.1musiclyrics.net/{}/{}/{}.html'.format( self.prepare_url_parameter(artist[0]), self.prepare_url_parameter(artist, to_replace=to_replace, delimiter='_'), self.prepare_url_parameter(title, to_replace=to_replace, delimiter='_')) # return 404 if song not found page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) if soup.html.title.text == '1 Music Lyrics': # song not found, redirected to main page return None main_pane = soup.find('div', {'id': 'welcomeBox'}) title_parts = main_pane.findAll('h1', {'id': 'welcomeText'}, recursive=False) song_artist = title_parts[1].text song_title = title_parts[0].text[:-13] lyrics_pane = main_pane.find('p', {'class': False}, recursive=False) lyrics = self.parse_verse_block(lyrics_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ['!', '"', '(', ')'] to_replace = [' ', '.', "'", ' + '] link = 'https://www.musixmatch.com/lyrics/{}/{}'.format( self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace), self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace)) # return 404 if song not found page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) title_pane = soup.find('div', {'class': 'mxm-track-title'}) song_artist = title_pane.find('a').text song_title_tag = title_pane.find('h1', recursive=False) self.remove_tags_from_block(song_title_tag, ['small']) song_title = song_title_tag.text base_lyrics_pane = soup.find('div', {'class': 'mxm-lyrics'}) lyrics = '' for lyrics_pane in base_lyrics_pane.findAll( 'p', {'class': 'mxm-lyrics__content'}): lyrics += (lyrics_pane.text + '\n') return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def get_song(self, page): soup = self.prepare_soup(page) artist_block = soup.find('table', {'class': 'imagetabletitle'}) if artist_block: artist_block = artist_block.find('div', {'class': 'titletextpage'}) song_artist = artist_block.get_text() else: artist_block = soup.find('div', {'class': 'artistlyricblocknew'}) artist_block = artist_block.find('div', {'class': 'artisttext1new'}) song_artist = artist_block.get_text().strip() title_block = soup.find('table', {'class': 'songtitle'}) if title_block: title_block = title_block.find('div', {'class': 'titletextpage'}) song_title = title_block.get_text() else: title_block = soup.find('div', {'class': 'titlelyricblocknew'}) title_block = title_block.find('div', {'class': 'titletext1new'}) song_title = title_block.get_text().strip() lyrics = [] main_lyric_block = soup.find('div', {'id': 'langtabs'}) # we should reverse list order to move original japanese lyric to the top for lyric_block in main_lyric_block.findAll('div', id=True)[::-1]: block_id = lyric_block['id'] # skip 'Details' (song info) and all '*-tv' (tv-size versions) blocks if (not block_id.startswith('Details')) and (not block_id.endswith('-tv')): lyric = self.parse_verse_block(lyric_block) lyrics.append(lyric) return Song(song_artist, song_title, self.sanitize_lyrics(lyrics))
def search_song(self, artist, title): link = 'http://www.kget.jp/search/index.php?r={}&t={}'.format( self.prepare_url_parameter(artist, delimiter='+'), self.prepare_url_parameter(title, delimiter='+')) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) lyric_list_pane = soup.find('div', {'class': 'title-wrap cf'}) if lyric_list_pane is None: # empty search result list return None artist_pane = lyric_list_pane.find('p', { 'class': 'artist' }).find('a') song_artist = artist_pane.text title_pane = lyric_list_pane.find('a', {'class': 'lyric-anchor'}) song_title = title_pane.find('h2', {'class': 'title'}).text song_link = 'http://www.kget.jp' + title_pane['href'] lyric_page = self.download_webpage(song_link) soup = self.prepare_soup(lyric_page) lyric_pane = soup.find('div', {'id': 'lyric-trunk'}) lyric = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): link = 'http://www.songs-lyrics.net/lyrics-search.php?ar={}&so={}&submit=Search'.format( self.prepare_url_parameter(artist), self.prepare_url_parameter(title)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) results_pane = soup.find('table', {'class': 'table2'}) for item in results_pane.findAll('td', {'class': 'text'}): tags = item.findAll('a', recursive=False) song_artist = tags[0].text song_title = tags[2].text[:-7] song_link = 'http://www.songs-lyrics.net' + tags[2]['href'] if self.compare_strings(artist, song_artist) and self.compare_strings( title, song_title): # can't send 2 consecutive requests - it will throw too many requests from your ip error time.sleep(2) page = self.download_webpage_text(song_link) if page: soup = self.prepare_soup(page) main_pane = soup.find('div', {'class': 'row panels'}) lyric_pane = main_pane.findAll('div', recursive=False)[1] lyric = self.parse_verse_block( lyric_pane, tags_to_skip=['div', 'table', 'h5']) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): to_delete = ['.', ',', "'", '?', '/', '(', ')', '!'] to_replace = [' '] link = 'http://alphabetlyrics.com/lyrics/{}/{}.html'.format( self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace, delimiter='_'), self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace, delimiter='_')) page = self.download_webpage_text(link) # return 404 if song not found if page: soup = self.prepare_soup(page) nav_bar = soup.find('div', {'class': 'songlist bglyric2'}) song_artist = nav_bar.findAll('a', recursive=False)[1].text song_title = nav_bar.find('b', recursive=False).text lyrics_pane = soup.findAll('div', {'class': 'lyrics'})[1] lyrics = '' for elem in lyrics_pane.findAll(['div', 'br'], recursive=False): lyrics += (elem.text.strip() + '\n') return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): link = 'http://utaten.com/lyric/{}/{}/'.format( self.prepare_url_parameter(artist), self.prepare_url_parameter(title)) page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) main_pane = soup.find('main') title_pane = main_pane.find('div', {'class': 'contentBox__title contentBox__title--lyricTitle'}) if not title_pane: # song not found, redirected to main page return None title_pane_parts = title_pane.h1.contents song_title = title_pane_parts[0].strip()[1:-1] song_artist = title_pane_parts[3].text.strip() lyric_pane = main_pane.find('div', {'class': 'lyricBody'}) lyric_pane = lyric_pane.find('div', {'class': 'medium'}) lyric = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): to_delete = [',', '(', ')', '&', '"', '?'] to_replace = [' ', "'", '.'] link = 'http://www.songlyrics.com/{}/{}-lyrics/'.format( self.prepare_url_parameter(artist.lower(), to_delete=to_delete, to_replace=to_replace, delimiter='-'), self.prepare_url_parameter(title.lower(), to_delete=to_delete, to_replace=to_replace, delimiter='-')) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) title_pane = soup.find('div', {'class': 'pagetitle'}) song_artist = title_pane.find('p', recursive=False).a.text title_header = title_pane.find('h1', recursive=False).text song_title = re.findall('.*? - (.*?) Lyrics', title_header)[0] lyric_pane = soup.find('p', {'id': 'songLyricsDiv'}) # lyrics pane may contain hidden image - and that will cause double newline in it's place lyric = self.parse_verse_block(lyric_pane, tags_to_skip=['img']) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): link = 'http://kashinavi.com/search.php?r=kyoku&search={}'.format( self.prepare_url_parameter(title, delimiter='+', quote_encoding='cp932')) page = self.download_webpage_text(link, 'shift_jis') if page: soup = self.prepare_soup(page) links = soup.findAll('a', href=re.compile('song_view.html\?\d?'), text=True) for link in links: song_artist = link.parent.find_next_sibling('td').text song_title = link.text song_link = link['href'] if self.compare_strings(artist, song_artist) and self.compare_strings( title, song_title): song_id = song_link.split('?', 2)[1] lyric_link = 'http://kashinavi.com/s/kashi.php?no={}'.format( song_id) page = self.download_webpage_text(lyric_link, 'shift_jis') if page: lyrics = page[page.index('>') + 1:page.rfind('<')].replace('<br>', '\n') return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ['?'] link = 'http://www.absolutelyrics.com/lyrics/view/{}/{}'.format( self.prepare_url_parameter(artist, to_delete=to_delete, delimiter='_'), self.prepare_url_parameter(title, to_delete=to_delete, delimiter='_')) page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) nav_pane = soup.find('div', {'id': 'nav'}) if not nav_pane: # song not found, we're redirected to search page which will not work without javascript return None nav_a_panes = nav_pane.findAll('a', recursive=False) if len(nav_pane) == 1: # song not found, we're redirected to search page # todo: process search suggestions return None song_artist = nav_a_panes[1].text.replace(' Lyrics', '') song_title = nav_a_panes[1].next_sibling.strip().replace( ' Lyrics', '')[2:] lyrics_pane = soup.find('p', {'id': 'view_lyrics'}) lyrics = self.parse_verse_block(lyrics_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ["'", '(', ')', '.', '?'] to_replace = [' '] link = 'https://genius.com/{}-{}-lyrics'.format( self.prepare_url_parameter(artist.lower(), to_delete=to_delete, to_replace=to_replace), self.prepare_url_parameter(title.lower(), to_delete=to_delete, to_replace=to_replace)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) artist_pane = soup.find( 'a', {'class': 'header_with_cover_art-primary_info-primary_artist'}) song_artist = artist_pane.text.strip() title_pane = soup.find( 'h1', {'class': 'header_with_cover_art-primary_info-title'}) song_title = title_pane.text.strip() lyric_pane = soup.find('div', {'class': 'lyrics'}).find('p') lyric = self.parse_verse_block(lyric_pane) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): link = 'https://www.googleapis.com/customsearch/v1element?key={}&rsz=filtered_cse&num=1&hl=ja&prettyPrint=true&source=gcsc&gss=.com&sig={}&cx={}&q={}%20{}'.format( 'AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY', 'af0b52154899cfe2ecd2e1ec788a43aa', '005893883342704476181:qa28d4ywjdg', self.prepare_url_parameter(artist), self.prepare_url_parameter(title)) page = self.download_webpage_json(link) if page: results = page['results'] if not results or len(results) < 0: return None elif not self.compare_strings(results[0]['titleNoFormatting'], '{} {} - 歌詞タイム'.format(title, artist)): return None lyric_link = results[0]['unescapedUrl'] lyric_page = self.download_webpage_text(lyric_link) if lyric_page: soup = self.prepare_soup(lyric_page) title_pane = soup.find('div', {'class': 'person_list_and_other_contents'}) song_title = title_pane.h1.text.strip() person_pane = title_pane.find('div', {'class': 'person_list'}) song_artist = person_pane.find('a').text.strip() lyric = self.get_lyrics(lyric_page) if not lyric: return None return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): # they don't understand quoted '&' as a delimiter between artists url = 'http://lyrics.wikia.com/api.php?action=lyrics&artist={}&song={}&fmt=realjson&func=getSong'.format( self.prepare_url_parameter(artist, safe_chars='&'), self.prepare_url_parameter(title)) page = self.download_webpage_text(url) if page: resp = json.loads(page) if resp['lyrics'] == 'Not found': return None song_artist = resp['artist'] song_title = resp['song'] lyric_url = resp['url'] lyric_page = self.download_webpage(lyric_url) soup = self.prepare_soup(lyric_page) lyrics = self.get_page_lyrics(soup) # load lyric translations from separate pages # eg: This song has been translated into these languages: Romanized, English. page_name = re.split('(?<!/)/(?!/)', lyric_url, 2)[1] for link in soup.findAll('a', {'title': True}, href=re.compile('/wiki/' + page_name + '/.*')): page = self.download_webpage_text('http://lyrics.wikia.com' + link['href']) lyrics.extend(self.get_page_lyrics(self.prepare_soup(page))) return Song(song_artist, song_title, self.sanitize_lyrics(lyrics))
def search_song(self, artist, title): to_delete = ["'", '’', '.', ',', '!', '?', '[', ']', '(', ')', '*'] to_replace = [' ', ' & '] link = 'http://showmelyrics.com/lyrics/{}-{}'.format( self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace), self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace)) page = self.download_webpage_text(link) # return 404 if song not found if page: soup = self.prepare_soup(page) title_pane = soup.find('div', {'class': 'box-title-current'}) full_title = title_pane.span.text.strip() title_parts = full_title.split(' – ', 2) song_artist = title_parts[0] song_title = title_parts[1] lyrics_pane = soup.find('div', {'id': 'lyrics'}) inner_lyrics_pane = lyrics_pane.find( 'div', {'class': 'editable editable-content'}, recursive=False) lyrics = '' for elem in inner_lyrics_pane.findAll('p', recursive=False): lyrics += (self.parse_verse_block(elem) + '\n\n') return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): to_delete = ["'", '.', ',', '!', '?', '(', ')'] to_replace = [' & ', ' / ', ' '] link = 'http://lyricsplace.com/songs/{}/{}/{}.html'.format( self.prepare_url_parameter(artist[0]), self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace), self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace)) # return 404 if song not found page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) nav_pane = soup.find('ul', {'class': 'breadcrumbs'}) if not nav_pane: # song not found (not all of them return 404) return None breadcrumbs = nav_pane.findAll('li', recursive=False) song_artist = breadcrumbs[2].a.text[:-7] song_title = breadcrumbs[3].span.text[:-12] lyrics_pane = soup.find('div', {'class': 'twelve columns lyric'}) lyrics = self.parse_verse_block(lyrics_pane, tags_to_skip=['h1']) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))
def search_song(self, artist, title): link = 'https://lyrsense.com/search?s={}'.format( self.prepare_url_parameter(title)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) song_info = self.parse_search_page(soup, artist, title) if not song_info: return None song_page = self.download_webpage_text(song_info[0]) if song_page: soup = self.prepare_soup(song_page) lyrics = [] lyric_blocks = soup.findAll('p', id=re.compile('.{2}_text')) for block in lyric_blocks: lyric = self.parse_verse_block(block) lyrics.append(lyric) return Song(song_info[1], song_info[2], self.sanitize_lyrics(lyrics))
def search_song(self, artist, title): link = 'http://www.darklyrics.com/search?q={}%20{}'.format( self.prepare_url_parameter(artist), self.prepare_url_parameter(title)) page = self.download_webpage(link) if page: soup = self.prepare_soup(page) main_pane = soup.find('div', {'class': 'cont'}) full_title = '{} - {}'.format(artist, title) for item in main_pane.findAll( 'a', {'target': '_blank'}, href=re.compile('lyrics/.*/.*html#[0-9]+')): if self.compare_strings(item.text, full_title): mobj = re.match('lyrics/.*/.*html#(?P<id>.*)', item['href']) song_id = mobj.group('id') song_link = 'http://www.darklyrics.com/' + item['href'] [song_artist, song_title] = item.text.split(' - ', 2) page = self.download_webpage_text(song_link) if page: soup = self.prepare_soup(page) lyrics_pane = soup.find('div', {'class': 'lyrics'}) lyric = self.find_verse_block(lyrics_pane, song_id) return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
def search_song(self, artist, title): to_delete = ['!', '?', '.', ',', '(', ')'] to_replace = [' ', "'", ' & '] link = 'http://www.allyrics.net/{}/lyrics/{}/'.format( self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace), self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace)) page = self.download_webpage_text(link) if page: soup = self.prepare_soup(page) nav_bar = soup.find('div', {'class', 'sh_nav'}) nav_bar_parts = nav_bar.findAll('a', recursive=False) song_artist = nav_bar_parts[2].text[:-7] song_title = nav_bar_parts[3].text[:-7].strip() if not song_title: # song not found return None lyrics_pane = soup.find('div', {'class': 'c_tl'}) lyrics = self.parse_verse_block(lyrics_pane, tags_to_skip=['script']) return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))