def search_artist(self, artist_page, callback, *data): """Search for the link to the page of artist in artists_page """ if artist_page is None: callback (None, *data) return artist_page = artist_page.decode('iso-8859-1') link_section = re.split ('tban.js', artist_page, 1)[1] pattern_link = '<a href="' pattern_artist = '([^"]*)">*([^<]*)<' links = re.split (pattern_link, link_section.lower()) links.pop(0) best_match = () smvalue_bestmatch = 0 for line in links: artist = re.findall(pattern_artist, line) if len(artist) == 0: continue artist_link, artist_name = artist[0] artist_url = 'http://www.darklyrics.com/%s' % (artist_link) if artist_link[:5] == 'http:': continue artist_name = artist_name.strip() smvalue = stringmatch.string_match (artist_name, self.artist_ascii) if smvalue > min_artist_match and smvalue > smvalue_bestmatch: best_match = (smvalue, artist_url, artist_name) smvalue_bestmatch = smvalue if not best_match: # Lyrics are located in external site callback (None, *data) return loader = rb.Loader () self.artist = best_match[2] loader.get_url (best_match[1], self.search_song, callback, *data)
def search_artist(self, artist_page, callback, *data): """Search for the link to the page of artist in artists_page """ if artist_page is None: callback(None, *data) return artist_page = artist_page.decode('iso-8859-1') link_section = re.split('tban.js', artist_page, 1)[1] pattern_link = '<a href="' pattern_artist = '([^"]*)">*([^<]*)<' links = re.split(pattern_link, link_section.lower()) links.pop(0) best_match = () smvalue_bestmatch = 0 for line in links: artist = re.findall(pattern_artist, line) if len(artist) == 0: continue artist_link, artist_name = artist[0] artist_url = 'http://www.darklyrics.com/%s' % (artist_link) if artist_link[:5] == 'http:': continue artist_name = artist_name.strip() smvalue = stringmatch.string_match(artist_name, self.artist_ascii) if smvalue > min_artist_match and smvalue > smvalue_bestmatch: best_match = (smvalue, artist_url, artist_name) smvalue_bestmatch = smvalue if not best_match: # Lyrics are located in external site callback(None, *data) return loader = rb.Loader() self.artist = best_match[2] loader.get_url(best_match[1], self.search_song, callback, *data)
def search_song (self, songlist, callback, *data): """If artist's page is found, search_song looks for the song. The artist page contains a list of all the albums and links to the songs lyrics from this. """ if songlist is None: callback (None, *data) return songlist = songlist.decode('iso-8859-1') # Search for all the <a> # filter for those that has the artist name string_match # and for those which its content is artist string_match # Sort by values given from string_match # and get the best link_section = re.split('LYRICS</h1>', songlist)[1] link_section = link_section.lower() pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)">(.*)</a>' matches = re.findall (pattern_song.lower(), link_section) best_match = "" for line in matches: artist, album, number, title = line smvalue = stringmatch.string_match (title.lower().replace(' ', '' ), self.title.lower().replace(' ', '')) if smvalue > min_song_match: best_match = self.SongFound(smvalue, title, number, album, artist) break if not best_match: callback (None, *data) return loader = rb.Loader () url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album) self.title = best_match.title self.titlenumber = best_match.number loader.get_url (url, self.parse_lyrics, callback, *data)
def search_song(self, songlist, callback, *data): """If artist's page is found, search_song looks for the song. The artist page contains a list of all the albums and links to the songs lyrics from this. """ if songlist is None: callback(None, *data) return songlist = songlist.decode('iso-8859-1') # Search for all the <a> # filter for those that has the artist name string_match # and for those which its content is artist string_match # Sort by values given from string_match # and get the best link_section = re.split('LYRICS</h1>', songlist)[1] link_section = link_section.lower() pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)">(.*)</a>' matches = re.findall(pattern_song.lower(), link_section) best_match = "" for line in matches: artist, album, number, title = line smvalue = stringmatch.string_match( title.lower().replace(' ', ''), self.title.lower().replace(' ', '')) if smvalue > min_song_match: best_match = self.SongFound(smvalue, title, number, album, artist) break if not best_match: callback(None, *data) return loader = rb.Loader() url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % ( best_match.artist, best_match.album) self.title = best_match.title self.titlenumber = best_match.number loader.get_url(url, self.parse_lyrics, callback, *data)