コード例 #1
0
ファイル: DarkLyricsParser.py プロジェクト: GNOME/rhythmbox
	def search_artist(self, artist_page, callback, *data):
		"""Search for the link to the page of artist in artists_page
		"""
		if artist_page is None:
			callback (None, *data)
			return
		artist_page = artist_page.decode('iso-8859-1')
		link_section = re.split ('tban.js', artist_page, 1)[1]
		pattern_link =  '<a href="'
		pattern_artist = '([^"]*)">*([^<]*)<'
		links = re.split (pattern_link, link_section.lower())
		links.pop(0)
		best_match = ()
		smvalue_bestmatch = 0
		for line in links:
			artist = re.findall(pattern_artist, line)
			if len(artist) == 0:
				continue
			artist_link, artist_name = artist[0]
			artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
			if artist_link[:5] == 'http:':
				continue
			artist_name = artist_name.strip()
			smvalue = stringmatch.string_match (artist_name, self.artist_ascii)
			if smvalue > min_artist_match and smvalue > smvalue_bestmatch:
				best_match = (smvalue, artist_url, artist_name)
				smvalue_bestmatch = smvalue

		if not best_match:
			# Lyrics are located in external site
			callback (None, *data)
			return
		loader = rb.Loader ()
		self.artist  = best_match[2]
		loader.get_url (best_match[1], self.search_song, callback, *data)
コード例 #2
0
    def search_artist(self, artist_page, callback, *data):
        """Search for the link to the page of artist in artists_page
		"""
        if artist_page is None:
            callback(None, *data)
            return
        artist_page = artist_page.decode('iso-8859-1')
        link_section = re.split('tban.js', artist_page, 1)[1]
        pattern_link = '<a href="'
        pattern_artist = '([^"]*)">*([^<]*)<'
        links = re.split(pattern_link, link_section.lower())
        links.pop(0)
        best_match = ()
        smvalue_bestmatch = 0
        for line in links:
            artist = re.findall(pattern_artist, line)
            if len(artist) == 0:
                continue
            artist_link, artist_name = artist[0]
            artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
            if artist_link[:5] == 'http:':
                continue
            artist_name = artist_name.strip()
            smvalue = stringmatch.string_match(artist_name, self.artist_ascii)
            if smvalue > min_artist_match and smvalue > smvalue_bestmatch:
                best_match = (smvalue, artist_url, artist_name)
                smvalue_bestmatch = smvalue

        if not best_match:
            # Lyrics are located in external site
            callback(None, *data)
            return
        loader = rb.Loader()
        self.artist = best_match[2]
        loader.get_url(best_match[1], self.search_song, callback, *data)
コード例 #3
0
ファイル: DarkLyricsParser.py プロジェクト: GNOME/rhythmbox
	def search_song (self, songlist, callback, *data):
		"""If artist's page is found, search_song looks for the song.

		The artist page contains a list of all the albums and
		links to the songs lyrics from this.
		"""
		if songlist is None:
			callback (None, *data)
			return
		songlist = songlist.decode('iso-8859-1')
		# Search for all the <a>
		# filter for those that has the artist name string_match
		#        and for those which its content is artist string_match
		# Sort by values given from string_match
		# and get the best
		link_section = re.split('LYRICS</h1>', songlist)[1]
		link_section = link_section.lower()
		pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)">(.*)</a>'
		matches = re.findall (pattern_song.lower(), link_section)
		best_match = ""
		for line in matches:
			artist, album, number, title = line
			smvalue = stringmatch.string_match (title.lower().replace(' ', '' ),
					   self.title.lower().replace(' ', ''))
			if smvalue > min_song_match:
				best_match  = self.SongFound(smvalue,
							     title,
							     number,
							     album,
							     artist)
				break
		if not best_match:
			callback (None, *data)
			return
		loader = rb.Loader ()
		url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album)
		self.title = best_match.title
		self.titlenumber = best_match.number
		loader.get_url (url, self.parse_lyrics, callback, *data)
コード例 #4
0
    def search_song(self, songlist, callback, *data):
        """If artist's page is found, search_song looks for the song.

		The artist page contains a list of all the albums and
		links to the songs lyrics from this.
		"""
        if songlist is None:
            callback(None, *data)
            return
        songlist = songlist.decode('iso-8859-1')
        # Search for all the <a>
        # filter for those that has the artist name string_match
        #        and for those which its content is artist string_match
        # Sort by values given from string_match
        # and get the best
        link_section = re.split('LYRICS</h1>', songlist)[1]
        link_section = link_section.lower()
        pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)">(.*)</a>'
        matches = re.findall(pattern_song.lower(), link_section)
        best_match = ""
        for line in matches:
            artist, album, number, title = line
            smvalue = stringmatch.string_match(
                title.lower().replace(' ', ''),
                self.title.lower().replace(' ', ''))
            if smvalue > min_song_match:
                best_match = self.SongFound(smvalue, title, number, album,
                                            artist)
                break
        if not best_match:
            callback(None, *data)
            return
        loader = rb.Loader()
        url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (
            best_match.artist, best_match.album)
        self.title = best_match.title
        self.titlenumber = best_match.number
        loader.get_url(url, self.parse_lyrics, callback, *data)