Exemple #1
0
	def got_results (self, result, callback, *data):
		if result is None:
			callback (None, *data)
			return

		results = re.sub('\n', '', re.sub('\r', '', result))

		if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results) is not None:
			body = re.split('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results)[1]
			entries = re.split('<tr><td bgcolor="#BBBBBB"', body)
			entries.pop(0)
			print "found %d entries; looking for [%s,%s]" % (len(entries), self.title, self.artist)
			for entry in entries:
				url = re.split('(\/display[^"]*)', entry)[1]
				artist = re.split('(Artist:.*html">)([^<]*)', entry)[2].strip()
				title = re.split('(\/display[^>]*)([^<]*)', entry)[2][1:].strip()

				if self.artist != "":
					artist_str = string_match(self.artist, artist)
				else:
					artist_str = artist_match + 0.1

				title_str = string_match(self.title, title)

				print "checking [%s,%s]: match strengths [%f,%f]" % (title.strip(), artist.strip(), title_str, artist_str)
				if title_str > title_match and artist_str > artist_match:
					loader = rb.Loader()
					loader.get_url ('http://display.lyrics.astraweb.com' + url, self.parse_lyrics, callback, *data)
					return

		callback (None, *data)
		return
Exemple #2
0
	def search_artist(self, artist_page, callback, *data):
		"""Search for the link to the page of artist in artists_page
		"""
		if artist_page is None:
			callback (None, *data)
			return

		link_section = re.split ('<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>', 
					 artist_page, 1)[1]
		pattern_link =  '<a href="'
		pattern_artist = '([^"]*)">*([^<]*)</a><br><br>'
		links = re.split (pattern_link, link_section.lower())
		links.pop(0)
		best_match = ()
		for line in links:
			artist = re.findall(pattern_artist, line)
			if len(artist) == 0:
				continue
			artist_link, artist_name = artist[0]
			artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
			if artist_link[:5] == 'http:':
				continue
			artist_name = artist_name.strip()
			smvalue = string_match (artist_name, self.artist_ascii)
			if smvalue > min_artist_match:
				best_match = (smvalue, artist_url, artist_name)

		if not best_match:
			# Lyrics are located in external site
			callback (None, *data)
			return
		loader = rb.Loader ()
		self.artist  = best_match[2]
		loader.get_url (best_match[1], self.search_song, callback, *data)
    def got_lyrics(self, lyrics, callback, *data):
        if lyrics is None:
            callback(None, *data)
            return

        element = cElementTree.fromstring(lyrics)
        if element.find("response").attrib['code'] is not '0':
            print "got failed response:" + lyrics
            callback(None, *data)
            return

        match = None
        matches = element.find("searchResults").findall("result")
        print "got %d result(s)" % (len(matches))
        for m in matches:
            matchtitle = m.findtext("title")
            matchartist = m.findtext("artist/name")

            # if we don't know the artist, then anyone will do
            if self.artist != "":
                artist_str = string_match(self.artist, matchartist)
            else:
                artist_str = artist_match + 0.1

            title_str = string_match(self.title, matchtitle)
            if artist_str > artist_match and title_str > title_match:
                print "found acceptable match, artist: %s (%f), title: %s (%f)" % (
                    matchartist, artist_str, matchtitle, title_str)
                match = m
                break
            else:
                print "skipping match, artist: %s (%f), title: %s (%f)" % (
                    matchartist, artist_str, matchtitle, title_str)

        if match is not None:
            hid = m.attrib['hid'].encode('utf-8')
            lurl = "http://api.leoslyrics.com/api_lyrics.php?auth=Rhythmbox&hid=%s" % (
                urllib.quote(hid))
            loader = rb.Loader()
            loader.get_url(lurl, self.parse_lyrics, callback, *data)
        else:
            print "no acceptable match found"
            callback(None, *data)
	def got_lyrics (self, lyrics, callback, *data):
		if lyrics is None:
			callback (None, *data)
			return

		element = cElementTree.fromstring(lyrics)
		if element.find("response").attrib['code'] is not '0':
			print "got failed response:" + lyrics
			callback (None, *data)
			return

		match = None
		matches = element.find("searchResults").findall("result")
		print "got %d result(s)" % (len(matches))
		for m in matches:
			matchtitle = m.findtext("title")
			matchartist = m.findtext("artist/name")

			# if we don't know the artist, then anyone will do
			if self.artist != "":
				artist_str = string_match(self.artist, matchartist)
			else:
				artist_str = artist_match + 0.1

			title_str = string_match(self.title, matchtitle)
			if artist_str > artist_match and title_str > title_match:
				print "found acceptable match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str)
				match = m
				break
			else:
				print "skipping match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str)

		if match is not None:
			hid = m.attrib['hid'].encode('utf-8')
			lurl = "http://api.leoslyrics.com/api_lyrics.php?auth=Rhythmbox&hid=%s" % (urllib.quote(hid))
			loader = rb.Loader()
			loader.get_url (lurl, self.parse_lyrics, callback, *data)
		else:
			print "no acceptable match found"
			callback (None, *data)
Exemple #5
0
	def search_song (self, songlist, callback, *data):
		"""If artist's page is found, search_song looks for the song.

		The artist page contains a list of all the albums and
		links to the songs lyrics from this.
		"""
		if songlist is None:
			callback (None, *data)
			return
		# Search for all the <a>
		# filter for those that has the artist name string_match
		#        and for those which its content is artist string_match
		# Sort by values given from string_match
		# and get the best
		link_section = re.split('LYRICS<BR></FONT>', songlist)[1]
		link_section = link_section.lower()
		pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>'
		matches = re.findall (pattern_song.lower(), link_section)
		best_match = ""
		for line in matches:
			artist, album, number, title = line
			smvalue = string_match (title.lower().replace(' ', '' ),
					   self.title.lower().replace(' ', ''))
			if smvalue > min_song_match:
				best_match  = self.SongFound(smvalue,
							     title,
							     number,
							     album,
							     artist)
		if not best_match:
			callback (None, *data)
			return
		loader = rb.Loader ()
		url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album)
		self.title = best_match.title
		self.titlenumber = best_match.number
		loader.get_url (url, self.parse_lyrics, callback, *data)
	def get_best_match_urls (self, search_results):
		best = None
		best_match = 0.0

		print "attempting to find best search result from %d sets" % len(search_results)

		for result in search_results:
			if not hasattr (result, "Item"):
				# Search was unsuccessful, try next batch job
				continue

			valid = filter(self.__valid_match, result.Item)
			print "%d valid results in this set" % len(valid)
			for item in valid:

				album_match = DEFAULT_MATCH
				item_album = str(getattr(item.ItemAttributes, "Title", ""))
				if item_album != "":
					album_match = string_match (self.search_album, item_album)

				# match against all returned artist names, taking the best result
				item_artists = getattr(item.ItemAttributes, "Artist", [])
				if len(item_artists) > 0:
					artist_match = 0.0
					for artist in item_artists:
						# special case 'various artists' for great justice
						if artist == "Various Artists":
							m = DEFAULT_MATCH
						else:
							m = string_match (self.search_artist, artist)
						if m > artist_match:
							artist_match = m
				else:
					artist_match = DEFAULT_MATCH

				# figure out the match strength
				if self.search_album == "":
					this_match = artist_match
				elif self.search_artist == "":
					this_match = album_match
				else:
					# this probably isn't the best way to combine match strengths.
					# extremely low values for one match should disqualify a result completely,
					# so we don't end up with the wrong album for the right artist.
					this_match = (artist_match + album_match) / 2

				# reject results with a match strength below a given floor
				if album_match < REJECT_MATCH or artist_match < REJECT_MATCH:
					result = "rejected"
				elif this_match > best_match:
					best = item
					best_match = this_match
					result = "best"
				else:
					result = "discarded"

				print "search result: \"%s\" (%f) by \"%s\" (%f): %f, %s" % (item_album, album_match, str(item_artists), artist_match, this_match, result)


		print "best result has match strength %f" % best_match
		if best_match > MINIMUM_MATCH:
			urls = [getattr (best, size).URL for size in ("LargeImage", "MediumImage") if hasattr (best, size)]
			if urls:
				print "got urls: %s" % urls
				return urls

		# No search was successful
		print "no search results to return"
		return []
    def get_best_match_urls(self, search_results):
        best = None
        best_match = 0.0

        print "attempting to find best search result from %d sets" % len(
            search_results)

        for result in search_results:
            if not hasattr(result, "Item"):
                # Search was unsuccessful, try next batch job
                continue

            valid = filter(self.__valid_match, result.Item)
            print "%d valid results in this set" % len(valid)
            for item in valid:

                album_match = DEFAULT_MATCH
                item_album = str(getattr(item.ItemAttributes, "Title", ""))
                if item_album != "":
                    album_match = string_match(self.search_album, item_album)

                # match against all returned artist names, taking the best result
                item_artists = getattr(item.ItemAttributes, "Artist", [])
                if len(item_artists) > 0:
                    artist_match = 0.0
                    for artist in item_artists:
                        # special case 'various artists' for great justice
                        if artist == "Various Artists":
                            m = DEFAULT_MATCH
                        else:
                            m = string_match(self.search_artist, artist)
                        if m > artist_match:
                            artist_match = m
                else:
                    artist_match = DEFAULT_MATCH

                # figure out the match strength
                if self.search_album == "":
                    this_match = artist_match
                elif self.search_artist == "":
                    this_match = album_match
                else:
                    # this probably isn't the best way to combine match strengths.
                    # extremely low values for one match should disqualify a result completely,
                    # so we don't end up with the wrong album for the right artist.
                    this_match = (artist_match + album_match) / 2

                # reject results with a match strength below a given floor
                if album_match < REJECT_MATCH or artist_match < REJECT_MATCH:
                    result = "rejected"
                elif this_match > best_match:
                    best = item
                    best_match = this_match
                    result = "best"
                else:
                    result = "discarded"

                print "search result: \"%s\" (%f) by \"%s\" (%f): %f, %s" % (
                    item_album, album_match, str(item_artists), artist_match,
                    this_match, result)

        print "best result has match strength %f" % best_match
        if best_match > MINIMUM_MATCH:
            urls = [
                getattr(best, size).URL
                for size in ("LargeImage", "MediumImage")
                if hasattr(best, size)
            ]
            if urls:
                print "got urls: %s" % urls
                return urls

        # No search was successful
        print "no search results to return"
        return []