예제 #1
0
    def search_song(self, artist, title):
        to_delete = ["'", '!', '(', ')', '[', ']']

        link = "http://www.lyrster.com/lyrics/{}-lyrics-{}.html".format(
            self.prepare_url_parameter(title, to_delete=to_delete),
            self.prepare_url_parameter(artist, to_delete=to_delete))

        page = self.download_webpage(link)
        if page:
            soup = self.prepare_soup(page)

            head_pane = soup.find("div", {"id": "lyrics-info"})
            if head_pane is None:
                # song wasn't found and we're redirected to main page
                return None

            song_title = head_pane.find("h1").text.replace(" Lyrics", "")
            song_artist = head_pane.find("a").text

            lyric_pane = soup.find("div", {"id": "lyrics"})
            lyric = self.parse_verse_block(lyric_pane)

            if lyric == "We do not have the complete song's lyrics just yet." or lyric.startswith(
                    'Shortcut to '):
                # empty song page without lyric
                return None
            else:
                return Song(song_artist, song_title,
                            self.sanitize_lyrics([lyric]))
예제 #2
0
    def search_song(self, artist, title):
        # no need to delete/replace chars - site will handle redirects
        link = 'http://www.1songlyrics.com/{}/{}/{}.html'.format(
            self.prepare_url_parameter(artist[0]),
            self.prepare_url_parameter(artist),
            self.prepare_url_parameter(title))

        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            if soup.html.title.text == '1 Song Lyrics':
                # song not found, redirected to main page
                return None

            main_pane = soup.find('div', {'id': 'wrapper'})
            headers = main_pane.findAll('h2', recursive=False)

            song_artist = headers[1].text
            song_title = headers[0].text[:-12]

            lyrics_pane = soup.findAll('p', {'class': False})[1]
            lyrics = self.parse_verse_block(lyrics_pane)

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #3
0
    def search_song(self, artist, title):
        to_delete = ['.', '(', ')', "'", ',', '.', '?', '-']
        to_replace = [' ', ' & ']

        link = 'http://www.nitrolyrics.com/{}_{}-lyrics.html'.format(
            self.prepare_url_parameter(artist, to_delete, to_replace),
            self.prepare_url_parameter(title, to_delete, to_replace))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            head_pane = soup.find('div', {'class', 'lyric'})
            if head_pane is None:
                # empty page - song not found
                return None

            song_artist = head_pane.find('a').text
            song_title = head_pane.find('h1').text.replace(' Lyrics', '')

            lyric_pane = soup.find('div', {'class': 'lyricContent'})
            lyric = self.parse_verse_block(lyric_pane.find('p'))

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #4
0
    def search_song(self, artist, title):
        link = 'https://www.uta-net.com/search/?Aselect=2&Keyword={}'.format(
            self.prepare_url_parameter(title, delimiter='+'))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            search_pane = soup.find('tbody')
            for item in search_pane.findAll('tr', recursive=False):
                tds = item.findAll('td', recursive=False)

                song_artist = tds[1].a.text
                song_title = tds[0].a.text
                song_id = tds[0].a['href'].split('/')[2]

                if self.compare_strings(artist, song_artist) and self.compare_strings(title, song_title):
                    song_link = 'https://www.uta-net.com/song/{}/'.format(song_id)
                    song_page = self.download_webpage_text(song_link)
                    if song_page:
                        soup = self.prepare_soup(song_page)
                        lyric_pane = soup.find('div', {'id': 'kashi_area'})

                        if lyric_pane:
                            lyric = self.parse_verse_block(lyric_pane)
                            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #5
0
    def search_song(self, artist, title):
        artist_link = 'http://www.lyrics.com/artist/{}/'.format(
            self.prepare_url_parameter(artist))

        artist_page = self.download_webpage(artist_link)

        if artist_page:
            soup = self.prepare_soup(artist_page)

            artist_pane = soup.find('p', {'class': 'artist'})
            if not artist_pane:
                # artist page not found, redirect to search page
                return None

            song_artist = artist_pane.a.text

            for item in soup.findAll('a', href=re.compile('lyric/[0-9]+')):
                song_title = item.text

                if self.compare_strings(title, song_title):
                    song_link = 'http://www.lyrics.com' + item['href']

                    song_page = self.download_webpage(song_link)

                    if song_page:
                        soup = self.prepare_soup(song_page)

                        lyrics_pane = soup.find('pre',
                                                {'id': 'lyric-body-text'})
                        lyrics = self.parse_verse_block_custom(lyrics_pane)

                        return Song(song_artist, song_title,
                                    self.sanitize_lyrics([lyrics]))
예제 #6
0
    def search_song(self, artist, title):
        to_delete = ["'", '(', ')']
        to_replace = [' ']

        link = "http://www.metrolyrics.com/{}-lyrics-{}.html".format(
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       to_replace=to_replace),
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            lyric_pane = soup.find("div", {"id": "lyrics-body-text"})
            if lyric_pane is None:  # song not found
                return None

            lyric = ''
            for verse_pane in lyric_pane.findAll("p", {"class": "verse"}):
                verse = self.parse_verse_block(verse_pane)
                lyric += (verse + '\n\n')

            if lyric.strip():
                # if lyric is empty that mean that they've only empty stub page for that song
                return Song(artist, title, self.sanitize_lyrics([lyric]))
예제 #7
0
    def search_song(self, artist, title):
        link = "http://search.j-lyric.net/index.php?kt={}&ct=0&ka={}&ca=0".format(
            self.prepare_url_parameter(title, delimiter='+'),
            self.prepare_url_parameter(artist))

        page = self.download_webpage(link)
        if page:
            soup = self.prepare_soup(page)

            lyric_list_pane = soup.find('div', {'class': 'bdy'})
            if lyric_list_pane is None:
                return None

            artist_pane = lyric_list_pane.find('p', {'class': 'sml'}).find('a')
            song_artist = artist_pane.text

            title_pane = lyric_list_pane.find('p', {'class': 'mid'}).find('a')
            song_title = title_pane.text
            song_link = title_pane['href']

            lyric_page = self.download_webpage(song_link)
            soup = self.prepare_soup(lyric_page)

            lyric_pane = soup.find('p', {'id': 'Lyric'})
            lyric = self.parse_verse_block(lyric_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #8
0
    def search_song(self, artist, title):
        # it seems that they can handle any symbol in link and redirect it correct page so there is no need replace and
        # delete anything
        link = 'http://www.lyricsaction.com/{}/{}/{}-lyrics.htm'.format(
            self.prepare_url_parameter(artist[0]),
            self.prepare_url_parameter(artist),
            self.prepare_url_parameter(title))

        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            main_pane = soup.find('div', {'class', 'node'})

            title_pane = main_pane.find('h2', recursive=False)
            title_parts = title_pane.text.split(' lyrics - ', 2)

            if len(title_parts) < 2:
                # song not found, redirected to main page
                return None

            song_artist = title_parts[0]
            song_title = title_parts[1]

            lyrics_pane = main_pane.find('div', {
                'class': 'entry'
            },
                                         recursive=False).p
            lyrics = self.parse_verse_block(lyrics_pane)

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #9
0
    def search_song(self, artist, title):
        to_delete = ['.', ',', '!', '?', '(', ')', '~', '/', "'", '"']
        to_replace = [' ']
        link = 'http://lyrics.snakeroot.ru/{}/{}/{}_{}.html'.format(
            self.prepare_url_parameter(artist[0].upper()),
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='_'),
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='_').lower(),
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='_').lower())

        # return 404 if song not found
        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            main_pane = soup.find('div', {'id': 'content'})

            title_pane = main_pane.find('h2', recursive=False)
            song_artist = title_pane.a.text
            song_title = title_pane.a.next_sibling[3:]

            # searching for first non-empty paragraph
            for lyrics_pane in reversed(main_pane.findAll('p')):
                if lyrics_pane.text.strip() != '':
                    lyrics = self.parse_verse_block(lyrics_pane)
                    return Song(song_artist, song_title,
                                self.sanitize_lyrics([lyrics]))
예제 #10
0
    def search_song(self, artist, title):
        to_delete = ['(', ')', "'", ',']

        link = 'http://www.lyricsreg.com/lyrics/{}/{}/'.format(
            self.prepare_url_parameter(artist.replace(' & ', ' and '),
                                       to_delete=to_delete),
            self.prepare_url_parameter(title.replace(' & ', ' and '),
                                       to_delete=to_delete))

        page = self.download_webpage(link)
        if page:
            soup = self.prepare_soup(page)

            main_pane = soup.find('section', {'id': 'maincontent'})

            header = main_pane.find('h2', {'class': 'content-subhead'})
            artist_title = header.text.split(' lyrics : ')
            song_artist = artist_title[0]
            song_title = artist_title[1][1:-1]

            lyric_pane = main_pane.find('div', {'style': 'text-align:center'},
                                        recursive=False)
            lyric = self.parse_verse_block(lyric_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #11
0
    def search_song(self, artist, title):
        to_delete = ['!', '?', '(', ')', "'", '"', ',']
        to_replace = [' ', ' & ', ' / ', '/', ':']

        link = "http://www.lyricshuddle.com/{}/{}/{}.html".format(
            self.prepare_url_parameter(artist[0]),
            self.prepare_url_parameter(artist, to_delete, to_replace),
            self.prepare_url_parameter(title, to_delete, to_replace))

        page = self.download_webpage(link)
        if page:
            soup = self.prepare_soup(page)
            if soup.text == 'Impossible to find lyrics.':
                return None

            head_pane = soup.find("div", {"class": "location"})
            head_pane_parts = head_pane.findAll("a")

            song_artist = head_pane_parts[2].text
            song_title = head_pane_parts[3].text

            main_pane = soup.find("div", {"class": "lyricstext"})
            self.remove_tags_from_block(main_pane, ['div', 'style'])
            lyric = self.parse_verse_block(main_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #12
0
    def search_song(self, artist, title):
        to_delete = ['.', "'", '?', '(', ')']
        to_replace = [' ', ' & ']

        link = 'http://www.lyricsmania.com/{}_lyrics_{}.html'.format(
            self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace, delimiter='_'),
            self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace, delimiter='_'))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)
            main_pane = soup.find('div', {'class': 'lyrics-body'})
            if main_pane is None:
                # song wasn't found on site and we're redirected on main page where we can't find lyric pane
                return None
            else:
                main_pane = main_pane.find('div', {'class': 'fb-quotable'})

            # search for artist and title
            header_pane = soup.find('div', {'class': 'lyrics-nav'})
            song_title = header_pane.find('h2').text
            song_title = re.sub(' lyrics$', '', song_title)
            song_artist = header_pane.find('h3').text

            # first part of lyrics to the left of video block
            lyric1 = self.parse_verse_block(main_pane)

            lyric_pane = main_pane.find('div', {'class': 'p402_premium'})

            # second part of lyrics below of video block
            lyric2 = self.parse_verse_block(lyric_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric1 + '\n\n' + lyric2]))
예제 #13
0
    def search_song(self, artist, title):
        to_replace = [
            '.', ',', "'", '!', '?', '&', '(', ')', '-', '>', '/', ' '
        ]
        link = 'http://www.1musiclyrics.net/{}/{}/{}.html'.format(
            self.prepare_url_parameter(artist[0]),
            self.prepare_url_parameter(artist,
                                       to_replace=to_replace,
                                       delimiter='_'),
            self.prepare_url_parameter(title,
                                       to_replace=to_replace,
                                       delimiter='_'))

        # return 404 if song not found
        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            if soup.html.title.text == '1 Music Lyrics':
                # song not found, redirected to main page
                return None

            main_pane = soup.find('div', {'id': 'welcomeBox'})

            title_parts = main_pane.findAll('h1', {'id': 'welcomeText'},
                                            recursive=False)
            song_artist = title_parts[1].text
            song_title = title_parts[0].text[:-13]

            lyrics_pane = main_pane.find('p', {'class': False},
                                         recursive=False)
            lyrics = self.parse_verse_block(lyrics_pane)

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #14
0
    def search_song(self, artist, title):
        to_delete = ['!', '"', '(', ')']
        to_replace = [' ', '.', "'", ' + ']

        link = 'https://www.musixmatch.com/lyrics/{}/{}'.format(
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace),
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       to_replace=to_replace))

        # return 404 if song not found
        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            title_pane = soup.find('div', {'class': 'mxm-track-title'})

            song_artist = title_pane.find('a').text

            song_title_tag = title_pane.find('h1', recursive=False)
            self.remove_tags_from_block(song_title_tag, ['small'])
            song_title = song_title_tag.text

            base_lyrics_pane = soup.find('div', {'class': 'mxm-lyrics'})

            lyrics = ''
            for lyrics_pane in base_lyrics_pane.findAll(
                    'p', {'class': 'mxm-lyrics__content'}):
                lyrics += (lyrics_pane.text + '\n')

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #15
0
    def get_song(self, page):
        soup = self.prepare_soup(page)

        artist_block = soup.find('table', {'class': 'imagetabletitle'})
        if artist_block:
            artist_block = artist_block.find('div', {'class': 'titletextpage'})
            song_artist = artist_block.get_text()
        else:
            artist_block = soup.find('div', {'class': 'artistlyricblocknew'})
            artist_block = artist_block.find('div', {'class': 'artisttext1new'})
            song_artist = artist_block.get_text().strip()

        title_block = soup.find('table', {'class': 'songtitle'})
        if title_block:
            title_block = title_block.find('div', {'class': 'titletextpage'})
            song_title = title_block.get_text()
        else:
            title_block = soup.find('div', {'class': 'titlelyricblocknew'})
            title_block = title_block.find('div', {'class': 'titletext1new'})
            song_title = title_block.get_text().strip()

        lyrics = []

        main_lyric_block = soup.find('div', {'id': 'langtabs'})
        # we should reverse list order to move original japanese lyric to the top
        for lyric_block in main_lyric_block.findAll('div', id=True)[::-1]:
            block_id = lyric_block['id']
            # skip 'Details' (song info) and all '*-tv' (tv-size versions) blocks
            if (not block_id.startswith('Details')) and (not block_id.endswith('-tv')):
                lyric = self.parse_verse_block(lyric_block)
                lyrics.append(lyric)

        return Song(song_artist, song_title, self.sanitize_lyrics(lyrics))
예제 #16
0
    def search_song(self, artist, title):
        link = 'http://www.kget.jp/search/index.php?r={}&t={}'.format(
            self.prepare_url_parameter(artist, delimiter='+'),
            self.prepare_url_parameter(title, delimiter='+'))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            lyric_list_pane = soup.find('div', {'class': 'title-wrap cf'})
            if lyric_list_pane is None:
                # empty search result list
                return None

            artist_pane = lyric_list_pane.find('p', {
                'class': 'artist'
            }).find('a')
            song_artist = artist_pane.text

            title_pane = lyric_list_pane.find('a', {'class': 'lyric-anchor'})
            song_title = title_pane.find('h2', {'class': 'title'}).text
            song_link = 'http://www.kget.jp' + title_pane['href']

            lyric_page = self.download_webpage(song_link)
            soup = self.prepare_soup(lyric_page)

            lyric_pane = soup.find('div', {'id': 'lyric-trunk'})
            lyric = self.parse_verse_block(lyric_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #17
0
    def search_song(self, artist, title):
        link = 'http://www.songs-lyrics.net/lyrics-search.php?ar={}&so={}&submit=Search'.format(
            self.prepare_url_parameter(artist),
            self.prepare_url_parameter(title))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            results_pane = soup.find('table', {'class': 'table2'})
            for item in results_pane.findAll('td', {'class': 'text'}):
                tags = item.findAll('a', recursive=False)

                song_artist = tags[0].text
                song_title = tags[2].text[:-7]
                song_link = 'http://www.songs-lyrics.net' + tags[2]['href']

                if self.compare_strings(artist,
                                        song_artist) and self.compare_strings(
                                            title, song_title):
                    # can't send 2 consecutive requests - it will throw too many requests from your ip error
                    time.sleep(2)
                    page = self.download_webpage_text(song_link)
                    if page:
                        soup = self.prepare_soup(page)

                        main_pane = soup.find('div', {'class': 'row panels'})
                        lyric_pane = main_pane.findAll('div',
                                                       recursive=False)[1]
                        lyric = self.parse_verse_block(
                            lyric_pane, tags_to_skip=['div', 'table', 'h5'])

                        return Song(song_artist, song_title,
                                    self.sanitize_lyrics([lyric]))
예제 #18
0
    def search_song(self, artist, title):
        to_delete = ['.', ',', "'", '?', '/', '(', ')', '!']
        to_replace = [' ']
        link = 'http://alphabetlyrics.com/lyrics/{}/{}.html'.format(
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='_'),
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='_'))

        page = self.download_webpage_text(link)
        # return 404 if song not found
        if page:
            soup = self.prepare_soup(page)

            nav_bar = soup.find('div', {'class': 'songlist bglyric2'})
            song_artist = nav_bar.findAll('a', recursive=False)[1].text
            song_title = nav_bar.find('b', recursive=False).text

            lyrics_pane = soup.findAll('div', {'class': 'lyrics'})[1]

            lyrics = ''
            for elem in lyrics_pane.findAll(['div', 'br'], recursive=False):
                lyrics += (elem.text.strip() + '\n')

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #19
0
    def search_song(self, artist, title):
        link = 'http://utaten.com/lyric/{}/{}/'.format(
            self.prepare_url_parameter(artist),
            self.prepare_url_parameter(title))

        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            main_pane = soup.find('main')

            title_pane = main_pane.find('div', {'class': 'contentBox__title contentBox__title--lyricTitle'})
            if not title_pane:
                # song not found, redirected to main page
                return None

            title_pane_parts = title_pane.h1.contents

            song_title = title_pane_parts[0].strip()[1:-1]
            song_artist = title_pane_parts[3].text.strip()

            lyric_pane = main_pane.find('div', {'class': 'lyricBody'})
            lyric_pane = lyric_pane.find('div', {'class': 'medium'})

            lyric = self.parse_verse_block(lyric_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #20
0
    def search_song(self, artist, title):
        to_delete = [',', '(', ')', '&', '"', '?']
        to_replace = [' ', "'", '.']

        link = 'http://www.songlyrics.com/{}/{}-lyrics/'.format(
            self.prepare_url_parameter(artist.lower(),
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='-'),
            self.prepare_url_parameter(title.lower(),
                                       to_delete=to_delete,
                                       to_replace=to_replace,
                                       delimiter='-'))

        page = self.download_webpage(link)
        if page:
            soup = self.prepare_soup(page)

            title_pane = soup.find('div', {'class': 'pagetitle'})
            song_artist = title_pane.find('p', recursive=False).a.text
            title_header = title_pane.find('h1', recursive=False).text
            song_title = re.findall('.*? - (.*?) Lyrics', title_header)[0]

            lyric_pane = soup.find('p', {'id': 'songLyricsDiv'})
            # lyrics pane may contain hidden image - and that will cause double newline in it's place
            lyric = self.parse_verse_block(lyric_pane, tags_to_skip=['img'])

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #21
0
    def search_song(self, artist, title):
        link = 'http://kashinavi.com/search.php?r=kyoku&search={}'.format(
            self.prepare_url_parameter(title,
                                       delimiter='+',
                                       quote_encoding='cp932'))

        page = self.download_webpage_text(link, 'shift_jis')

        if page:
            soup = self.prepare_soup(page)

            links = soup.findAll('a',
                                 href=re.compile('song_view.html\?\d?'),
                                 text=True)
            for link in links:
                song_artist = link.parent.find_next_sibling('td').text
                song_title = link.text
                song_link = link['href']

                if self.compare_strings(artist,
                                        song_artist) and self.compare_strings(
                                            title, song_title):
                    song_id = song_link.split('?', 2)[1]
                    lyric_link = 'http://kashinavi.com/s/kashi.php?no={}'.format(
                        song_id)

                    page = self.download_webpage_text(lyric_link, 'shift_jis')
                    if page:
                        lyrics = page[page.index('>') +
                                      1:page.rfind('<')].replace('<br>', '\n')
                        return Song(song_artist, song_title,
                                    self.sanitize_lyrics([lyrics]))
예제 #22
0
    def search_song(self, artist, title):
        to_delete = ['?']
        link = 'http://www.absolutelyrics.com/lyrics/view/{}/{}'.format(
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       delimiter='_'),
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       delimiter='_'))

        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            nav_pane = soup.find('div', {'id': 'nav'})
            if not nav_pane:
                # song not found, we're redirected to search page which will not work without javascript
                return None

            nav_a_panes = nav_pane.findAll('a', recursive=False)

            if len(nav_pane) == 1:
                # song not found, we're redirected to search page
                # todo: process search suggestions
                return None

            song_artist = nav_a_panes[1].text.replace(' Lyrics', '')
            song_title = nav_a_panes[1].next_sibling.strip().replace(
                ' Lyrics', '')[2:]

            lyrics_pane = soup.find('p', {'id': 'view_lyrics'})
            lyrics = self.parse_verse_block(lyrics_pane)

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #23
0
    def search_song(self, artist, title):
        to_delete = ["'", '(', ')', '.', '?']
        to_replace = [' ']

        link = 'https://genius.com/{}-{}-lyrics'.format(
            self.prepare_url_parameter(artist.lower(),
                                       to_delete=to_delete,
                                       to_replace=to_replace),
            self.prepare_url_parameter(title.lower(),
                                       to_delete=to_delete,
                                       to_replace=to_replace))

        page = self.download_webpage(link)
        if page:
            soup = self.prepare_soup(page)

            artist_pane = soup.find(
                'a',
                {'class': 'header_with_cover_art-primary_info-primary_artist'})
            song_artist = artist_pane.text.strip()

            title_pane = soup.find(
                'h1', {'class': 'header_with_cover_art-primary_info-title'})
            song_title = title_pane.text.strip()

            lyric_pane = soup.find('div', {'class': 'lyrics'}).find('p')
            lyric = self.parse_verse_block(lyric_pane)

            return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #24
0
    def search_song(self, artist, title):
        link = 'https://www.googleapis.com/customsearch/v1element?key={}&rsz=filtered_cse&num=1&hl=ja&prettyPrint=true&source=gcsc&gss=.com&sig={}&cx={}&q={}%20{}'.format(
            'AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY',
            'af0b52154899cfe2ecd2e1ec788a43aa',
            '005893883342704476181:qa28d4ywjdg',
            self.prepare_url_parameter(artist),
            self.prepare_url_parameter(title))

        page = self.download_webpage_json(link)
        if page:
            results = page['results']

            if not results or len(results) < 0:
                return None
            elif not self.compare_strings(results[0]['titleNoFormatting'], '{} {} - 歌詞タイム'.format(title, artist)):
                return None

            lyric_link = results[0]['unescapedUrl']

            lyric_page = self.download_webpage_text(lyric_link)
            if lyric_page:
                soup = self.prepare_soup(lyric_page)

                title_pane = soup.find('div', {'class': 'person_list_and_other_contents'})
                song_title = title_pane.h1.text.strip()

                person_pane = title_pane.find('div', {'class': 'person_list'})
                song_artist = person_pane.find('a').text.strip()

                lyric = self.get_lyrics(lyric_page)
                if not lyric:
                    return None

                return Song(song_artist, song_title, self.sanitize_lyrics([lyric]))
예제 #25
0
    def search_song(self, artist, title):
        # they don't understand quoted '&' as a delimiter between artists
        url = 'http://lyrics.wikia.com/api.php?action=lyrics&artist={}&song={}&fmt=realjson&func=getSong'.format(
            self.prepare_url_parameter(artist, safe_chars='&'),
            self.prepare_url_parameter(title))

        page = self.download_webpage_text(url)

        if page:
            resp = json.loads(page)
            if resp['lyrics'] == 'Not found':
                return None

            song_artist = resp['artist']
            song_title = resp['song']

            lyric_url = resp['url']
            lyric_page = self.download_webpage(lyric_url)

            soup = self.prepare_soup(lyric_page)

            lyrics = self.get_page_lyrics(soup)

            # load lyric translations from separate pages
            # eg: This song has been translated into these languages: Romanized, English.
            page_name = re.split('(?<!/)/(?!/)', lyric_url, 2)[1]
            for link in soup.findAll('a', {'title': True},
                                     href=re.compile('/wiki/' + page_name +
                                                     '/.*')):
                page = self.download_webpage_text('http://lyrics.wikia.com' +
                                                  link['href'])
                lyrics.extend(self.get_page_lyrics(self.prepare_soup(page)))

            return Song(song_artist, song_title, self.sanitize_lyrics(lyrics))
예제 #26
0
    def search_song(self, artist, title):
        to_delete = ["'", '’', '.', ',', '!', '?', '[', ']', '(', ')', '*']
        to_replace = [' ', ' & ']
        link = 'http://showmelyrics.com/lyrics/{}-{}'.format(
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace),
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       to_replace=to_replace))

        page = self.download_webpage_text(link)
        # return 404 if song not found
        if page:
            soup = self.prepare_soup(page)

            title_pane = soup.find('div', {'class': 'box-title-current'})
            full_title = title_pane.span.text.strip()
            title_parts = full_title.split(' – ', 2)

            song_artist = title_parts[0]
            song_title = title_parts[1]

            lyrics_pane = soup.find('div', {'id': 'lyrics'})
            inner_lyrics_pane = lyrics_pane.find(
                'div', {'class': 'editable editable-content'}, recursive=False)

            lyrics = ''
            for elem in inner_lyrics_pane.findAll('p', recursive=False):
                lyrics += (self.parse_verse_block(elem) + '\n\n')

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #27
0
    def search_song(self, artist, title):
        to_delete = ["'", '.', ',', '!', '?', '(', ')']
        to_replace = [' & ', ' / ', ' ']
        link = 'http://lyricsplace.com/songs/{}/{}/{}.html'.format(
            self.prepare_url_parameter(artist[0]),
            self.prepare_url_parameter(artist,
                                       to_delete=to_delete,
                                       to_replace=to_replace),
            self.prepare_url_parameter(title,
                                       to_delete=to_delete,
                                       to_replace=to_replace))

        # return 404 if song not found
        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            nav_pane = soup.find('ul', {'class': 'breadcrumbs'})
            if not nav_pane:
                # song not found (not all of them return 404)
                return None

            breadcrumbs = nav_pane.findAll('li', recursive=False)

            song_artist = breadcrumbs[2].a.text[:-7]
            song_title = breadcrumbs[3].span.text[:-12]

            lyrics_pane = soup.find('div', {'class': 'twelve columns lyric'})
            lyrics = self.parse_verse_block(lyrics_pane, tags_to_skip=['h1'])

            return Song(song_artist, song_title,
                        self.sanitize_lyrics([lyrics]))
예제 #28
0
    def search_song(self, artist, title):
        link = 'https://lyrsense.com/search?s={}'.format(
            self.prepare_url_parameter(title))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            song_info = self.parse_search_page(soup, artist, title)
            if not song_info:
                return None

            song_page = self.download_webpage_text(song_info[0])
            if song_page:
                soup = self.prepare_soup(song_page)

                lyrics = []
                lyric_blocks = soup.findAll('p', id=re.compile('.{2}_text'))

                for block in lyric_blocks:
                    lyric = self.parse_verse_block(block)
                    lyrics.append(lyric)

                return Song(song_info[1], song_info[2],
                            self.sanitize_lyrics(lyrics))
예제 #29
0
    def search_song(self, artist, title):
        link = 'http://www.darklyrics.com/search?q={}%20{}'.format(
            self.prepare_url_parameter(artist),
            self.prepare_url_parameter(title))

        page = self.download_webpage(link)

        if page:
            soup = self.prepare_soup(page)

            main_pane = soup.find('div', {'class': 'cont'})
            full_title = '{} - {}'.format(artist, title)

            for item in main_pane.findAll(
                    'a', {'target': '_blank'},
                    href=re.compile('lyrics/.*/.*html#[0-9]+')):

                if self.compare_strings(item.text, full_title):
                    mobj = re.match('lyrics/.*/.*html#(?P<id>.*)',
                                    item['href'])
                    song_id = mobj.group('id')

                    song_link = 'http://www.darklyrics.com/' + item['href']
                    [song_artist, song_title] = item.text.split(' - ', 2)

                    page = self.download_webpage_text(song_link)
                    if page:
                        soup = self.prepare_soup(page)

                        lyrics_pane = soup.find('div', {'class': 'lyrics'})
                        lyric = self.find_verse_block(lyrics_pane, song_id)

                        return Song(song_artist, song_title,
                                    self.sanitize_lyrics([lyric]))
예제 #30
0
    def search_song(self, artist, title):
        to_delete = ['!', '?', '.', ',', '(', ')']
        to_replace = [' ', "'", ' & ']
        link = 'http://www.allyrics.net/{}/lyrics/{}/'.format(
            self.prepare_url_parameter(artist, to_delete=to_delete, to_replace=to_replace),
            self.prepare_url_parameter(title, to_delete=to_delete, to_replace=to_replace))

        page = self.download_webpage_text(link)
        if page:
            soup = self.prepare_soup(page)

            nav_bar = soup.find('div', {'class', 'sh_nav'})
            nav_bar_parts = nav_bar.findAll('a', recursive=False)

            song_artist = nav_bar_parts[2].text[:-7]
            song_title = nav_bar_parts[3].text[:-7].strip()

            if not song_title:
                # song not found
                return None

            lyrics_pane = soup.find('div', {'class': 'c_tl'})
            lyrics = self.parse_verse_block(lyrics_pane, tags_to_skip=['script'])

            return Song(song_artist, song_title, self.sanitize_lyrics([lyrics]))