Python string_matchの例、rb.string_match Pythonの例

コード例 #1

0

ファイルを表示

	def got_results (self, result, callback, *data):
		if result is None:
			callback (None, *data)
			return

		result = result.decode('iso-8859-1')	# no indication of anything else..
		results = re.sub('\n', '', re.sub('\r', '', result))

		if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results) is not None:
			body = re.split('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results)[1]
			entries = re.split('<tr><td bgcolor="#BBBBBB"', body)
			entries.pop(0)
			print("found %d entries; looking for [%s,%s]" % (len(entries), self.title, self.artist))
			for entry in entries:
				url = re.split('(\/display[^"]*)', entry)[1]
				artist = re.split('(Artist:.*html">)([^<]*)', entry)[2].strip()
				title = re.split('(\/display[^>]*)([^<]*)', entry)[2][1:].strip()

				if self.artist != "":
					artist_str = rb.string_match(self.artist, artist)
				else:
					artist_str = artist_match + 0.1

				title_str = rb.string_match(self.title, title)

				print("checking [%s,%s]: match strengths [%f,%f]" % (title.strip(), artist.strip(), title_str, artist_str))
				if title_str > title_match and artist_str > artist_match:
					loader = rb.Loader()
					loader.get_url ('http://display.lyrics.astraweb.com' + url, self.parse_lyrics, callback, *data)
					return

		callback (None, *data)
		return

コード例 #2

0

ファイルを表示

ファイル: AstrawebParser.py プロジェクト: bilboed/rhythmbox

	def got_results (self, result, callback, *data):
		if result is None:
			callback (None, *data)
			return

		results = re.sub('\n', '', re.sub('\r', '', result))

		if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results) is not None:
			body = re.split('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results)[1]
			entries = re.split('<tr><td bgcolor="#BBBBBB"', body)
			entries.pop(0)
			print "found %d entries; looking for [%s,%s]" % (len(entries), self.title, self.artist)
			for entry in entries:
				url = re.split('(\/display[^"]*)', entry)[1]
				artist = re.split('(Artist:.*html">)([^<]*)', entry)[2].strip()
				title = re.split('(\/display[^>]*)([^<]*)', entry)[2][1:].strip()

				if self.artist != "":
					artist_str = rb.string_match(self.artist, artist)
				else:
					artist_str = artist_match + 0.1

				title_str = rb.string_match(self.title, title)

				print "checking [%s,%s]: match strengths [%f,%f]" % (title.strip(), artist.strip(), title_str, artist_str)
				if title_str > title_match and artist_str > artist_match:
					loader = rb.Loader()
					loader.get_url ('http://display.lyrics.astraweb.com' + url, self.parse_lyrics, callback, *data)
					return

		callback (None, *data)
		return

コード例 #3

0

ファイルを表示

    def search_artist(self, artist_page, callback, *data):
        """Search for the link to the page of artist in artists_page
		"""
        if artist_page is None:
            callback(None, *data)
            return

        link_section = re.split(
            '<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>',
            artist_page, 1)[1]
        pattern_link = '<a href="'
        pattern_artist = '([^"]*)">*([^<]*)</a><br><br>'
        links = re.split(pattern_link, link_section.lower())
        links.pop(0)
        best_match = ()
        for line in links:
            artist = re.findall(pattern_artist, line)
            if len(artist) == 0:
                continue
            artist_link, artist_name = artist[0]
            artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
            if artist_link[:5] == 'http:':
                continue
            artist_name = artist_name.strip()
            smvalue = rb.string_match(artist_name, self.artist_ascii)
            if smvalue > min_artist_match:
                best_match = (smvalue, artist_url, artist_name)

        if not best_match:
            # Lyrics are located in external site
            callback(None, *data)
            return
        loader = rb.Loader()
        self.artist = best_match[2]
        loader.get_url(best_match[1], self.search_song, callback, *data)

コード例 #4

0

ファイルを表示

    def search_song(self, songlist, callback, *data):
        """If artist's page is found, search_song looks for the song.

		The artist page contains a list of all the albums and
		links to the songs lyrics from this.
		"""
        if songlist is None:
            callback(None, *data)
            return
        # Search for all the <a>
        # filter for those that has the artist name string_match
        #        and for those which its content is artist string_match
        # Sort by values given from string_match
        # and get the best
        link_section = re.split('LYRICS<BR></FONT>', songlist)[1]
        link_section = link_section.lower()
        pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>'
        matches = re.findall(pattern_song.lower(), link_section)
        best_match = ""
        for line in matches:
            artist, album, number, title = line
            smvalue = rb.string_match(title.lower().replace(' ', ''),
                                      self.title.lower().replace(' ', ''))
            if smvalue > min_song_match:
                best_match = self.SongFound(smvalue, title, number, album,
                                            artist)
        if not best_match:
            callback(None, *data)
            return
        loader = rb.Loader()
        url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (
            best_match.artist, best_match.album)
        self.title = best_match.title
        self.titlenumber = best_match.number
        loader.get_url(url, self.parse_lyrics, callback, *data)

コード例 #5

0

ファイルを表示

ファイル: DarkLyricsParser.py プロジェクト: bilboed/rhythmbox

	def search_artist(self, artist_page, callback, *data):
		"""Search for the link to the page of artist in artists_page
		"""
		if artist_page is None:
			callback (None, *data)
			return

		link_section = re.split ('<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>', 
					 artist_page, 1)[1]
		pattern_link =  '<a href="'
		pattern_artist = '([^"]*)">*([^<]*)</a><br><br>'
		links = re.split (pattern_link, link_section.lower())
		links.pop(0)
		best_match = ()
		for line in links:
			artist = re.findall(pattern_artist, line)
			if len(artist) == 0:
				continue
			artist_link, artist_name = artist[0]
			artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
			if artist_link[:5] == 'http:':
				continue
			artist_name = artist_name.strip()
			smvalue = rb.string_match (artist_name, self.artist_ascii)
			if smvalue > min_artist_match:
				best_match = (smvalue, artist_url, artist_name)

		if not best_match:
			# Lyrics are located in external site
			callback (None, *data)
			return
		loader = rb.Loader ()
		self.artist  = best_match[2]
		loader.get_url (best_match[1], self.search_song, callback, *data)

コード例 #6

0

ファイルを表示

	def got_lyrics (self, lyrics, callback, *data):
		if lyrics is None:
			callback (None, *data)
			return

		element = cElementTree.fromstring(lyrics)
		if element.find("response").attrib['code'] is not '0':
			print "got failed response:" + lyrics
			callback (None, *data)
			return

		match = None
		matches = element.find("searchResults").findall("result")
		print "got %d result(s)" % (len(matches))
		for m in matches:
			matchtitle = m.findtext("title")
			matchartist = m.findtext("artist/name")

			# if we don't know the artist, then anyone will do
			if self.artist != "":
				artist_str = rb.string_match(self.artist, matchartist)
			else:
				artist_str = artist_match + 0.1

			title_str = rb.string_match(self.title, matchtitle)
			if artist_str > artist_match and title_str > title_match:
				print "found acceptable match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str)
				match = m
				break
			else:
				print "skipping match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str)

		if match is not None:
			hid = m.attrib['hid'].encode('utf-8')
			lurl = "http://api.leoslyrics.com/api_lyrics.php?auth=Rhythmbox&hid=%s" % (urllib.quote(hid))
			loader = rb.Loader()
			loader.get_url (lurl, self.parse_lyrics, callback, *data)
		else:
			print "no acceptable match found"
			callback (None, *data)

コード例 #7

0

ファイルを表示

ファイル: DarkLyricsParser.py プロジェクト: bilboed/rhythmbox

	def search_song (self, songlist, callback, *data):
		"""If artist's page is found, search_song looks for the song.

		The artist page contains a list of all the albums and
		links to the songs lyrics from this.
		"""
		if songlist is None:
			callback (None, *data)
			return
		# Search for all the <a>
		# filter for those that has the artist name string_match
		#        and for those which its content is artist string_match
		# Sort by values given from string_match
		# and get the best
		link_section = re.split('LYRICS<BR></FONT>', songlist)[1]
		link_section = link_section.lower()
		pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>'
		matches = re.findall (pattern_song.lower(), link_section)
		best_match = ""
		for line in matches:
			artist, album, number, title = line
			smvalue = rb.string_match (title.lower().replace(' ', '' ),
					   self.title.lower().replace(' ', ''))
			if smvalue > min_song_match:
				best_match  = self.SongFound(smvalue,
							     title,
							     number,
							     album,
							     artist)
		if not best_match:
			callback (None, *data)
			return
		loader = rb.Loader ()
		url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album)
		self.title = best_match.title
		self.titlenumber = best_match.number
		loader.get_url (url, self.parse_lyrics, callback, *data)