def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: url = self.BASE_URL % (lyrics.title, lyrics.artist) utilities.log(debug, "%s: searching url %s" % (__title__, url)) data = urllib.urlopen(url).read() songmatch = re.search('song-title.*?<em>(.*?)</em>', data, flags=re.DOTALL) track = songmatch.group(1) artistmatch = re.search('artist-title.*?<em>(.*?)</em>', data, flags=re.DOTALL) name = artistmatch.group(1) urlmatch = re.search("down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL) found_url = urlmatch.group(1) if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8 ) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8): lyr = urllib.urlopen(self.LRC_URL % found_url).read() else: return False except: return False enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: url = 'http://www.lyrdb.com/karaoke/?q=%s+%s&action=search' %(lyrics.artist.replace(' ','+').lower(), lyrics.title.replace(' ','+').lower()) f = urllib.urlopen(url) Page = f.read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return False links_query = re.compile('<tr><td class="tresults"><a href="/karaoke/([0-9]+).htm">(.*?)</td><td class="tresults">(.*?)</td>') urls = re.findall(links_query, Page) links = [] for x in urls: if (difflib.SequenceMatcher(None, song.artist.lower(), x[2].lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), x[1].lower()).ratio() > 0.8): links.append( ( x[2] + ' - ' + x[1], x[0], x[2], x[1] ) ) if len(links) == 0: return False elif len(links) > 1: lyrics.list = links lyr = self.get_lyrics_from_list(links[0]) if not lyr: return False lyrics.lyrics = lyr return True
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) filename = lyrics.filename.decode("utf-8") ext = os.path.splitext(filename)[1].lower() lry = None try: if ext == '.mp3': lry = getLyrics3(filename) except: pass if lry: enc = chardet.detect(lry) lyrics.lyrics = lry.decode(enc['encoding']) else: if ext == '.mp3': lry = getID3Lyrics(filename) elif ext == '.flac': lry = getFlacLyrics(filename) elif ext == '.m4a': lry = getMP4Lyrics(filename) if not lry: return False lyrics.lyrics = lry return True
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) filename = lyrics.filename.decode("utf-8") ext = os.path.splitext(filename)[1].lower() lry = None try: if ext == '.mp3': lry = getLyrics3(filename) except: pass if lry: enc = chardet.detect(lry) lyrics.lyrics = lry.decode(enc['encoding']) else: if ext == '.mp3': lry = getID3Lyrics(filename) elif ext == '.flac': lry = getFlacLyrics(filename) elif ext == '.m4a': lry = getMP4Lyrics(filename) if not lry: return False lyrics.lyrics = lry return True
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: headers = {'Content-Type': 'text/xml; charset=utf-8'} request = urllib2.Request( ALSONG_URL, ALSONG_TMPL % (lyrics.title, lyrics.artist), headers) response = urllib2.urlopen(request) Page = response.read() except: return False tree = xml.parseString(Page) try: name = tree.getElementsByTagName( 'strArtistName')[0].childNodes[0].data track = tree.getElementsByTagName('strTitle')[0].childNodes[0].data except: return False if (difflib.SequenceMatcher( None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher( None, lyrics.title.lower(), track.lower()).ratio() > 0.8): lyr = tree.getElementsByTagName( 'strLyric')[0].childNodes[0].data.replace('<br>', '\n') lyrics.lyrics = lyr.encode('utf-8') return True return False
def buildLyrics(lyrics): from lxml import etree xml = etree.XML(u'<lyrics></lyrics>') etree.SubElement(xml, "artist").text = lyrics.artist etree.SubElement(xml, "album").text = lyrics.album etree.SubElement(xml, "title").text = lyrics.title etree.SubElement( xml, "syncronized").text = 'True' if __syncronized__ else 'False' etree.SubElement(xml, "grabber").text = lyrics.source lines = lyrics.lyrics.splitlines() for line in lines: line2 = re.sub( u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+', '', line) etree.SubElement(xml, "lyric").text = line2 utilities.log( True, utilities.convert_etree( etree.tostring(xml, encoding='UTF-8', pretty_print=True, xml_declaration=True))) sys.exit(0)
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: url = self.BASE_URL % (urllib.quote( lyrics.title), urllib.quote((lyrics.artist))) xml_str = urllib.urlopen(url).read() lrcid_pattern = re.compile(r'<lrcid>(.+?)</lrcid>') lrcid = int(re.search(lrcid_pattern, xml_str).group(1)) if lrcid == 0: return False lrc_url = self.LRC_URL % (lrcid / 100, lrcid) lyr = urllib.urlopen(lrc_url).read() except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return False enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: url = self.BASE_URL % (urllib.quote(lyrics.title), urllib.quote((lyrics.artist))) xml_str = urllib.urlopen(url).read() lrcid_pattern = re.compile(r'<lrcid>(.+?)</lrcid>') lrcid = int(re.search(lrcid_pattern, xml_str).group(1)) if lrcid == 0: return False lrc_url = self.LRC_URL % (lrcid/100, lrcid) lyr = urllib.urlopen(lrc_url).read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return False enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) # replace ampersands and the like for exp in LYRIC_ARTIST_REPLACE: p = re.compile(exp[0]) artist = p.sub(exp[1], lyrics.artist) for exp in LYRIC_TITLE_REPLACE: p = re.compile(exp[0]) title = p.sub(exp[1], lyrics.title) # strip things like "(live at Somewhere)", "(accoustic)", etc for exp in LYRIC_TITLE_STRIP: p = re.compile(exp) title = p.sub('', title) # compress spaces title = title.strip().replace('`', '').replace('/', '') artist = artist.strip().replace('`', '').replace('/', '') try: url = self.LIST_URL % ( ttpClient.EncodeArtTit(artist.replace(' ', '').lower()), ttpClient.EncodeArtTit(title.replace(' ', '').lower())) f = urlopen(url) Page = f.read().decode('utf-8') except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return False links_query = re.compile( '<lrc id=\"(.*?)\" artist=\"(.*?)\" title=\"(.*?)\"></lrc>') urls = re.findall(links_query, Page) links = [] for x in urls: if (difflib.SequenceMatcher(None, artist.lower(), x[1].lower()).ratio() > 0.8) and (difflib.SequenceMatcher( None, title.lower(), x[2].lower()).ratio() > 0.8): links.append((x[1] + ' - ' + x[2], x[0], x[1], x[2])) if len(links) == 0: return False elif len(links) > 1: lyrics.list = links for link in links: lyr = self.get_lyrics_from_list(link) if lyr and lyr.startswith(b'['): enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True return False
def main(): global debug parser = OptionParser() parser.add_option('-v', "--version", action="store_true", default=False, dest="version", help="Display version and author") parser.add_option('-t', "--test", action="store_true", default=False, dest="test", help="Perform self-test for dependencies.") parser.add_option('-s', "--search", action="store_true", default=False, dest="search", help="Search for lyrics.") parser.add_option('-a', "--artist", metavar="ARTIST", default=None, dest="artist", help="Artist of track.") parser.add_option('-b', "--album", metavar="ALBUM", default=None, dest="album", help="Album of track.") parser.add_option('-n', "--title", metavar="TITLE", default=None, dest="title", help="Title of track.") parser.add_option('-f', "--filename", metavar="FILENAME", default=None, dest="filename", help="Filename of track.") parser.add_option('-d', '--debug', action="store_true", default=False, dest="debug", help=("Show debug messages")) opts, args = parser.parse_args() lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ if opts.debug: debug = True if opts.version: buildVersion() if opts.test: performSelfTest() if opts.artist: lyrics.artist = opts.artist if opts.album: lyrics.album = opts.album if opts.title: lyrics.title = opts.title if opts.filename: lyrics.filename = opts.filename if (len(args) > 0): utilities.log('ERROR: invalid arguments found') sys.exit(1) fetcher = LyricsFetcher() if fetcher.get_lyrics(lyrics): buildLyrics(lyrics) sys.exit(0) else: utilities.log(True, "No lyrics found for this track") sys.exit(1)
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: request = Request( self.url % (quote(lyrics.artist), '%20', quote(lyrics.title))) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0' ) req = urlopen(request) response = req.read().decode('utf-8') except: return False req.close() data = simplejson.loads(response) try: self.page = data['response']['hits'][0]['result']['url'] except: return False utilities.log(debug, "%s: search url: %s" % (__title__, self.page)) try: request = Request(self.page) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0' ) req = urlopen(request) response = req.read().decode('utf-8') except: return False req.close() matchcode = re.search(u'<div class="lyrics">(.*?)</div>', response, flags=re.DOTALL) try: lyricscode = (matchcode.group(1)) htmlparser = html_parser.HTMLParser() lyricstext = htmlparser.unescape(lyricscode).replace( u'<br />', u'\n') templyr = re.sub(u'<[^<]+?>', '', lyricstext) lyr = re.sub(u'\[(.*?)\]', '', templyr) lyrics.lyrics = lyr.strip().replace(u'\n\n\n', u'\n\n') return True except: return False
def main(): global debug parser = OptionParser() parser.add_option('-v', "--version", action="store_true", default=False, dest="version", help="Display version and author") parser.add_option('-t', "--test", action="store_true", default=False, dest="test", help="Test grabber with a know good search") parser.add_option('-s', "--search", action="store_true", default=False, dest="search", help="Search for lyrics.") parser.add_option('-a', "--artist", metavar="ARTIST", default=None, dest="artist", help="Artist of track.") parser.add_option('-b', "--album", metavar="ALBUM", default=None, dest="album", help="Album of track.") parser.add_option('-n', "--title", metavar="TITLE", default=None, dest="title", help="Title of track.") parser.add_option('-f', "--filename", metavar="FILENAME", default=None, dest="filename", help="Filename of track.") parser.add_option('-d', '--debug', action="store_true", default=False, dest="debug", help=("Show debug messages")) opts, args = parser.parse_args() lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ if opts.debug: debug = True if opts.version: buildVersion() if opts.test: performSelfTest() if opts.artist: lyrics.artist = opts.artist if opts.album: lyrics.album = opts.album if opts.title: lyrics.title = opts.title if opts.filename: lyrics.filename = opts.filename fetcher = LyricsFetcher() if fetcher.get_lyrics(lyrics): buildLyrics(lyrics) sys.exit(0) else: utilities.log(True, "No lyrics found for this track") sys.exit(1)
def get_lyrics(self, lyrics): utilities.log( debug, '%s: searching lyrics for %s - %s' % (__title__, lyrics.artist, lyrics.title)) query = '%s+%s' % (urllib.quote_plus( lyrics.artist), urllib.quote_plus(lyrics.title)) try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0', 'Referer': 'https://www.letssingit.com/' } request = urllib2.Request(self.url % query, None, headers) req = urllib2.urlopen(request) response = req.read() utilities.log(False, response) except: return False req.close() matchcode = re.search('</TD><TD><A href="(.*?)"', response) if matchcode: lyricscode = (matchcode.group(1)) clean = lyricscode.lstrip('http://www.letssingit.com/').rsplit( '-', 1)[0] result = clean.replace('-lyrics-', ' ') if (difflib.SequenceMatcher(None, query.lower().replace('+', ''), result.lower().replace( '-', '')).ratio() > 0.8): try: request = urllib2.Request(lyricscode) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0' ) req = urllib2.urlopen(request) resp = req.read() except: return False req.close() # remove addslots resp = re.sub(r'<div id=adslot.*?</div>', '', resp) # find all class=lyrics_part_name and class=lyrics_part_text parts match = re.findall('<P class=lyrics_part_.*?>(.*?)</P>', resp, flags=re.DOTALL) if len(match): for line in match: lyrics.lyrics += line.replace('<br>', '') + '\n' else: return False return True
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) xml = "<?xml version=\"1.0\" encoding='utf-8'?>\r\n" xml += "<search filetype=\"lyrics\" artist=\"%s\" title=\"%s\" " % ( lyrics.artist, lyrics.title) xml += "ClientCharEncoding=\"utf-8\"/>\r\n" md5hash = md5(xml + "Mlv1clt4.0").digest() request = "\x02\x00\x04\x00\x00\x00%s%s" % (md5hash, xml) del md5hash, xml url = "http://www.viewlyrics.com:1212/searchlyrics.htm" #url = "http://search.crintsoft.com/searchlyrics.htm" req = urllib2.Request(url, request) req.add_header("User-Agent", "MiniLyrics") if self.proxy: opener = urllib2.build_opener(urllib2.ProxyHandler(self.proxy)) else: opener = urllib2.build_opener() try: response = opener.open(req).read() except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return False print response lrcList = self.miniLyricsParser(response) links = [] for x in lrcList: if (difflib.SequenceMatcher(None, song.artist.lower(), x[0].lower()).ratio() > 0.8) and (difflib.SequenceMatcher( None, song.title.lower(), x[1].lower()).ratio() > 0.8): links.append((x[0] + ' - ' + x[1], x[2], x[0], x[1])) if len(links) == 0: return False elif len(links) > 1: lyrics.list = links lyr = self.get_lyrics_from_list(links[0]) if not lyr: return False enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) # replace ampersands and the like for exp in LYRIC_ARTIST_REPLACE: p = re.compile(exp[0]) artist = p.sub(exp[1], lyrics.artist) for exp in LYRIC_TITLE_REPLACE: p = re.compile(exp[0]) title = p.sub(exp[1], lyrics.title) # strip things like "(live at Somewhere)", "(accoustic)", etc for exp in LYRIC_TITLE_STRIP: p = re.compile(exp) title = p.sub('', title) # compress spaces title = title.strip().replace('`','').replace('/','') artist = artist.strip().replace('`','').replace('/','') try: url = self.LIST_URL %(ttpClient.EncodeArtTit(artist.replace(' ','').lower()), ttpClient.EncodeArtTit(title.replace(' ','').lower())) f = urllib.urlopen(url) Page = f.read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return False links_query = re.compile('<lrc id=\"(.*?)\" artist=\"(.*?)\" title=\"(.*?)\"></lrc>') urls = re.findall(links_query, Page) links = [] for x in urls: if (difflib.SequenceMatcher(None, artist.lower(), x[1].lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), x[2].lower()).ratio() > 0.8): links.append( ( x[1] + ' - ' + x[2], x[0], x[1], x[2] ) ) if len(links) == 0: return False elif len(links) > 1: lyrics.list = links for link in links: lyr = self.get_lyrics_from_list(link) if lyr and lyr.startswith('['): enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True return False
def buildVersion(): from lxml import etree version = etree.XML(u'<grabber></grabber>') etree.SubElement(version, "name").text = __title__ etree.SubElement(version, "author").text = __author__ etree.SubElement(version, "command").text = 'ttplayer.py' etree.SubElement(version, "type").text = 'lyrics' etree.SubElement(version, "description").text = __description__ etree.SubElement(version, "version").text = __version__ etree.SubElement(version, "priority").text = __priority__ etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' utilities.log(True, etree.tostring(version, encoding='UTF-8', pretty_print=True, xml_declaration=True)) sys.exit(0)
def get_lyrics_from_list(self, link): title, url, artist, song = link try: f = urllib.urlopen(url) lyrics = f.read() except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return None enc = chardet.detect(lyrics) lyrics = lyrics.decode(enc['encoding'], 'ignore') return lyrics
def get_lyrics_from_list(self, link): title, Id, artist, song = link utilities.log(debug, '%s %s %s' % (Id, artist, song)) try: url = 'http://www.lyrdb.com/karaoke/downloadlrc.php?q=%s' % (Id) f = urllib.urlopen(url) Page = f.read() except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return None return Page
def get_lyrics_from_list(self, link): title,url,artist,song = link try: f = urllib.urlopen(url) lyrics = f.read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return None enc = chardet.detect(lyrics) lyrics = lyrics.decode(enc['encoding'], 'ignore') return lyrics
def buildLyrics(lyrics): from lxml import etree xml = etree.XML(u'<lyrics></lyrics>') etree.SubElement(xml, "artist").text = lyrics.artist etree.SubElement(xml, "album").text = lyrics.album etree.SubElement(xml, "title").text = lyrics.title etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' etree.SubElement(xml, "grabber").text = lyrics.source lines = lyrics.lyrics.splitlines() for line in lines: etree.SubElement(xml, "lyric").text = line utilities.log(True, etree.tostring(xml, encoding='UTF-8', pretty_print=True, xml_declaration=True)) sys.exit(0)
def get_lyrics_from_list(self, link): title, Id, artist, song = link utilities.log(debug, '%s %s %s' %(Id, artist, song)) try: url = 'http://www.lyrdb.com/karaoke/downloadlrc.php?q=%s' %(Id) f = urllib.urlopen(url) Page = f.read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return None return Page
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) key = None try: ext = os.path.splitext(lyrics.filename.decode("utf-8"))[1].lower() sup_ext = ['.mp3', '.ogg', '.wma', '.flac', '.ape', '.wav'] if ext in sup_ext: key = gomClient.GetKeyFromFile(lyrics.filename) if not key: return False url = GOM_URL % ( key, urllib.quote( remove_accents( lyrics.title.decode('utf-8')).encode('euc-kr')), (remove_accents( lyrics.artist.decode('utf-8')).encode('euc-kr'))) response = urllib.urlopen(url) Page = response.read() except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return False if Page[:Page.find('>') + 1] != '<lyrics_reply result="0">': return False syncs = re.compile('<sync start="(\d+)">([^<]*)</sync>').findall(Page) lyrline = [] lyrline.append("[ti:%s]" % lyrics.title) lyrline.append("[ar:%s]" % lyrics.artist) for sync in syncs: # timeformat conversion t = "%02d:%02d.%02d" % gomClient.mSecConv(int(sync[0])) # unescape string try: s = unicode(sync[1], "euc-kr").encode("utf-8").replace( "'", "'").replace(""", '"') lyrline.append("[%s]%s" % (t, s)) except: pass lyrics.lyrics = '\n'.join(lyrline) return True
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) links = self.search(lyrics.artist, lyrics.title); if(links == None or len(links) == 0): return False; elif len(links) > 1: lyrics.list = links lyr = self.get_lyrics_from_list(links[0]) if not lyr: return False enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True;
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) xml ="<?xml version=\"1.0\" encoding='utf-8'?>\r\n" xml+="<search filetype=\"lyrics\" artist=\"%s\" title=\"%s\" " % (lyrics.artist, lyrics.title) xml+="ClientCharEncoding=\"utf-8\"/>\r\n" md5hash = md5(xml+"Mlv1clt4.0").digest() request = "\x02\x00\x04\x00\x00\x00%s%s" % (md5hash, xml) del md5hash,xml url = "http://www.viewlyrics.com:1212/searchlyrics.htm" #url = "http://search.crintsoft.com/searchlyrics.htm" req = urllib2.Request(url,request) req.add_header("User-Agent", "MiniLyrics") if self.proxy: opener = urllib2.build_opener(urllib2.ProxyHandler(self.proxy)) else: opener = urllib2.build_opener() try: response = opener.open(req).read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return False print response lrcList = self.miniLyricsParser(response) links = [] for x in lrcList: if (difflib.SequenceMatcher(None, song.artist.lower(), x[0].lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), x[1].lower()).ratio() > 0.8): links.append( ( x[0] + ' - ' + x[1], x[2], x[0], x[1] ) ) if len(links) == 0: return False elif len(links) > 1: lyrics.list = links lyr = self.get_lyrics_from_list(links[0]) if not lyr: return False enc = chardet.detect(lyr) lyr = lyr.decode(enc['encoding'], 'ignore') lyrics.lyrics = lyr return True
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: from bs4 import BeautifulSoup except: utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4") return False try: request = urlopen(self.url % quote_plus(lyrics.artist)) response = request.read() except: return False request.close() soup = BeautifulSoup(response, 'html.parser') url = '' for link in soup.find_all('a'): if link.string and link.get('href').startswith('artist/'): url = 'http://www.lyrics.com/' + link.get('href') break if url: utilities.log(debug, "%s: Artist url is %s" % (__title__, url)) try: req = urlopen(url) resp = req.read().decode('utf-8') except: return False req.close() soup = BeautifulSoup(resp, 'html.parser') url = '' for link in soup.find_all('a'): if link.string and link.get('href').startswith('/lyric/') and (difflib.SequenceMatcher(None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8): url = 'http://www.lyrics.com' + link.get('href') break if url: utilities.log(debug, "%s: Song url is %s" % (__title__, url)) try: req2 = urlopen(url) resp2 = req2.read().decode('utf-8') except: return False req2.close() matchcode = re.search(u'<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL) if matchcode: lyricscode = (matchcode.group(1)) lyr = re.sub(u'<[^<]+?>', '', lyricscode) lyrics.lyrics = lyr.replace('\\n','\n') return True return False
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: from bs4 import BeautifulSoup except: utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4") return False try: request = urllib2.urlopen(self.url % urllib.quote_plus(lyrics.artist)) response = request.read() except: return False request.close() soup = BeautifulSoup(response, 'html.parser') url = '' for link in soup.find_all('a'): if link.string and link.get('href').startswith('artist/'): url = 'http://www.lyrics.com/' + link.get('href') break if url: utilities.log(debug, "%s: Artist url is %s" % (__title__, url)) try: req = urllib2.urlopen(url) resp = req.read() except: return False req.close() soup = BeautifulSoup(resp, 'html.parser') url = '' for link in soup.find_all('a'): if link.string and link.get('href').startswith('/lyric/') and (difflib.SequenceMatcher(None, link.string.lower(), lyrics.title.lower()).ratio() > 0.8): url = 'http://www.lyrics.com' + link.get('href') break if url: utilities.log(debug, "%s: Song url is %s" % (__title__, url)) try: req2 = urllib2.urlopen(url) resp2 = req2.read() except: return False req2.close() matchcode = re.search('<pre.*?>(.*?)</pre>', resp2, flags=re.DOTALL) if matchcode: lyricscode = (matchcode.group(1)) lyr = re.sub('<[^<]+?>', '', lyricscode) lyrics.lyrics = lyr.replace('\\n','\n') return True return False
def get_lyrics_from_list(self, link): title,Id,artist,song = link utilities.log(debug, '%s %s %s' %(Id, artist, song)) try: url = self.LYRIC_URL %(int(Id),ttpClient.CodeFunc(int(Id), artist + song), random.randint(0,0xFFFFFFFFFFFF)) f = urllib.urlopen(url) Page = f.read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return None if Page.startswith('['): return Page return ''
def performSelfTest(): found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Dire Straits' lyrics.album = 'Brothers In Arms' lyrics.title = 'Money For Nothing' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def performSelfTest(): found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Dagon' lyrics.album = 'Terraphobic' lyrics.title = 'Cut To The Heart' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def performSelfTest(): found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Robb Benson' lyrics.album = 'Demo Tracks' lyrics.title = 'Lone Rock' lyrics.filename = os.path.dirname(os.path.abspath(__file__)) + '/examples/taglyrics.mp3' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: request = urllib2.Request(self.url % (urllib2.quote(lyrics.artist), '%20', urllib2.quote(lyrics.title))) request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0') req = urllib2.urlopen(request) response = req.read() except: return False req.close() data = simplejson.loads(response) try: self.page = data['response']['hits'][0]['result']['url'] except: return False utilities.log(debug, "%s: search url: %s" % (__title__, self.page)) try: request = urllib2.Request(self.page) request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0') req = urllib2.urlopen(request) response = req.read() except: return False req.close() matchcode = re.search('<div class="lyrics">(.*?)</div>', response, flags=re.DOTALL) try: lyricscode = (matchcode.group(1)) htmlparser = HTMLParser.HTMLParser() lyricstext = htmlparser.unescape(lyricscode).replace('<br />', '\n') templyr = re.sub('<[^<]+?>', '', lyricstext) lyr = re.sub('\[(.*?)\]', '', templyr) lyrics.lyrics = lyr.strip().replace('\n\n\n', '\n\n') return True except: return False
def getLyricsGeneric(filename): try: import taglib except: utilities.log(True, "Failed to import taglib. This grabber requires " "pytaglib TagLib bindings for Python. " "https://github.com/supermihi/pytaglib") return None try: utilities.log(debug, "%s: trying to open %s" % (__title__, filename)) f = taglib.File(filename) # see if we can find a lyrics tag for tag in f.tags: if tag.startswith('LYRICS'): return f.tags[tag][0] return None except: return None
def performSelfTest(): try: import taglib except: utilities.log( True, "Failed to import taglib. This grabber requires " "pytaglib ? TagLib bindings for Python. " "https://github.com/supermihi/pytaglib") sys.exit(1) found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Robb Benson' lyrics.album = 'Demo Tracks' lyrics.title = 'Lone Rock' lyrics.filename = os.path.dirname( os.path.abspath(__file__)) + '/examples/taglyrics.mp3' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def performSelfTest(): try: from bs4 import BeautifulSoup except: utilities.log( True, "Failed to import BeautifulSoup. This grabber requires python-bs4") sys.exit(1) found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Dire Straits' lyrics.album = 'Brothers In Arms' lyrics.title = 'Money For Nothing' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") buildLyrics(lyrics) sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def performSelfTest(): try: import taglib except: utilities.log(True, "Failed to import taglib. This grabber requires " "pytaglib ? TagLib bindings for Python. " "https://github.com/supermihi/pytaglib") sys.exit(1) found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Robb Benson' lyrics.album = 'Demo Tracks' lyrics.title = 'Lone Rock' lyrics.filename = os.path.dirname(os.path.abspath(__file__)) + '/examples/taglyrics.mp3' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: headers = {'Content-Type':'text/xml; charset=utf-8'} request = urllib2.Request(ALSONG_URL, ALSONG_TMPL % (lyrics.title, lyrics.artist), headers) response = urllib2.urlopen(request) Page = response.read() except: return False tree = xml.parseString(Page) try: name = tree.getElementsByTagName('strArtistName')[0].childNodes[0].data track = tree.getElementsByTagName('strTitle')[0].childNodes[0].data except: return False if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8): lyr = tree.getElementsByTagName('strLyric')[0].childNodes[0].data.replace('<br>','\n') lyrics.lyrics = lyr.encode('utf-8') return True return False
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) key = None try: ext = os.path.splitext(lyrics.filename.decode("utf-8"))[1].lower() sup_ext = ['.mp3', '.ogg', '.wma', '.flac', '.ape', '.wav'] if ext in sup_ext: key = gomClient.GetKeyFromFile(lyrics.filename) if not key: return False url = GOM_URL %(key, urllib.quote(remove_accents(lyrics.title.decode('utf-8')).encode('euc-kr')), (remove_accents(lyrics.artist.decode('utf-8')).encode('euc-kr'))) response = urllib.urlopen(url) Page = response.read() except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return False if Page[:Page.find('>')+1] != '<lyrics_reply result="0">': return False syncs = re.compile('<sync start="(\d+)">([^<]*)</sync>').findall(Page) lyrline = [] lyrline.append( "[ti:%s]" %lyrics.title ) lyrline.append( "[ar:%s]" %lyrics.artist ) for sync in syncs: # timeformat conversion t = "%02d:%02d.%02d" % gomClient.mSecConv( int(sync[0]) ) # unescape string try: s = unicode(sync[1], "euc-kr").encode("utf-8").replace("'","'").replace(""",'"') lyrline.append( "[%s]%s" %(t,s) ) except: pass lyrics.lyrics = '\n'.join( lyrline ) return True
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: url = 'http://www.lyrdb.com/karaoke/?q=%s+%s&action=search' % ( lyrics.artist.replace( ' ', '+').lower(), lyrics.title.replace(' ', '+').lower()) f = urllib.urlopen(url) Page = f.read() except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return False links_query = re.compile( '<tr><td class="tresults"><a href="/karaoke/([0-9]+).htm">(.*?)</td><td class="tresults">(.*?)</td>' ) urls = re.findall(links_query, Page) links = [] for x in urls: if (difflib.SequenceMatcher(None, song.artist.lower(), x[2].lower()).ratio() > 0.8) and (difflib.SequenceMatcher( None, song.title.lower(), x[1].lower()).ratio() > 0.8): links.append((x[2] + ' - ' + x[1], x[0], x[2], x[1])) if len(links) == 0: return False elif len(links) > 1: lyrics.list = links lyr = self.get_lyrics_from_list(links[0]) if not lyr: return False lyrics.lyrics = lyr return True
def getLyrics3(filename): #Get lyrics embed with Lyrics3/Lyrics3V2 format #See: http://id3.org/Lyrics3 #http://id3.org/Lyrics3v2 utilities.log( debug, "%s: trying %s" % (__title__, "lyrics embed with Lyrics3/Lyrics3V2 format")) f = File(filename) f.seek(-128 - 9, os.SEEK_END) buf = f.read(9) if (buf != "LYRICS200" and buf != "LYRICSEND"): f.seek(-9, os.SEEK_END) buf = f.read(9) if (buf == "LYRICSEND"): """ Find Lyrics3v1 """ f.seek(-5100 - 9 - 11, os.SEEK_CUR) buf = f.read(5100 + 11) f.close() start = buf.find("LYRICSBEGIN") elif (buf == "LYRICS200"): """ Find Lyrics3v2 """ f.seek(-9 - 6, os.SEEK_CUR) size = int(f.read(6)) f.seek(-size - 6, os.SEEK_CUR) buf = f.read(11) if (buf == "LYRICSBEGIN"): buf = f.read(size - 11) tags = [] while buf != '': tag = buf[:3] length = int(buf[3:8]) content = buf[8:8 + length] if (tag == 'LYR'): return content buf = buf[8 + length:] f.close() return None
def getLyricsGeneric(filename): try: import taglib except: utilities.log( True, "Failed to import taglib. This grabber requires " "pytaglib TagLib bindings for Python. " "https://github.com/supermihi/pytaglib") return None try: utilities.log(debug, "%s: trying to open %s" % (__title__, filename)) f = taglib.File(filename) # see if we can find a lyrics tag for tag in f.tags: if tag.startswith('LYRICS'): return f.tags[tag][0] return None except: return None
def getLyrics3(filename): #Get lyrics embed with Lyrics3/Lyrics3V2 format #See: http://id3.org/Lyrics3 #http://id3.org/Lyrics3v2 utilities.log(debug, "%s: trying %s" % (__title__, "lyrics embed with Lyrics3/Lyrics3V2 format")) f = File(filename) f.seek(-128-9, os.SEEK_END) buf = f.read(9) if (buf != "LYRICS200" and buf != "LYRICSEND"): f.seek(-9, os.SEEK_END) buf = f.read(9) if (buf == "LYRICSEND"): """ Find Lyrics3v1 """ f.seek(-5100-9-11, os.SEEK_CUR) buf = f.read(5100+11) f.close(); start = buf.find("LYRICSBEGIN") elif (buf == "LYRICS200"): """ Find Lyrics3v2 """ f.seek(-9-6, os.SEEK_CUR) size = int(f.read(6)) f.seek(-size-6, os.SEEK_CUR) buf = f.read(11) if(buf == "LYRICSBEGIN"): buf = f.read(size-11) tags=[] while buf!= '': tag = buf[:3] length = int(buf[3:8]) content = buf[8:8+length] if (tag == 'LYR'): return content buf = buf[8+length:] f.close(); return None
def performSelfTest(): try: from bs4 import BeautifulSoup except: utilities.log(True, "Failed to import BeautifulSoup. This grabber requires python-bs4") sys.exit(1) found = False lyrics = utilities.Lyrics() lyrics.source = __title__ lyrics.syncronized = __syncronized__ lyrics.artist = 'Dire Straits' lyrics.album = 'Brothers In Arms' lyrics.title = 'Money For Nothing' fetcher = LyricsFetcher() found = fetcher.get_lyrics(lyrics) if found: utilities.log(True, "Everything appears in order.") sys.exit(0) utilities.log(True, "The lyrics for the test search failed!") sys.exit(1)
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: request = Request( self.url % (quote(lyrics.artist), '%20', quote(lyrics.title))) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0' ) req = urlopen(request) response = req.read().decode('utf-8') except: return False req.close() data = simplejson.loads(response) try: name = data['response']['hits'][0]['result']['primary_artist'][ 'name'] track = data['response']['hits'][0]['result']['title'] if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8 ) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8): self.page = data['response']['hits'][0]['result']['url'] else: return None except: return False utilities.log(debug, "%s: search url: %s" % (__title__, self.page)) try: request = Request(self.page) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:77.0) Gecko/20100101 Firefox/77.0' ) req = urlopen(request) response = req.read() except: return False req.close() try: htmlparser = html_parser.HTMLParser() response = htmlparser.unescape(response.decode('utf-8')) except: # may be necessary for python 3.10 response = html.unescape(response.decode('utf-8')) matchcode = re.search(u'<div class="[lL]yrics.*?">(.*?)</div>', response, flags=re.DOTALL) try: lyricscode = (matchcode.group(1)) lyr = re.sub('<[^<]+?>', '', lyricscode) lyrics.lyrics = lyr.replace('\\n', '\n').strip() return True except: return False
def getMP4Lyrics(filename): utilities.log( debug, "%s: trying %s" % (__title__, "lyrics embed with MP4 format")) # just use the generic taglib method for now return getLyricsGeneric(filename)
def get_lyrics(self, lyrics): utilities.log( debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) artist = utilities.deAccent(lyrics.artist) title = utilities.deAccent(lyrics.title) try: # below is borowed from XBMC Lyrics url = "http://www.lyricsmode.com/lyrics/%s/%s/%s.html" % ( artist.lower()[:1], artist.lower().replace("&", "and").replace( " ", "_"), title.lower().replace("&", "and").replace( " ", "_")) lyrics_found = False while True: utilities.log(debug, "%s: search url: %s" % (__title__, url)) song_search = urllib.urlopen(url).read() if song_search.find( "<p id=\"lyrics_text\" class=\"ui-annotatable\">" ) >= 0: break if lyrics_found: # if we're here, we found the lyrics page but it didn't # contains the lyrics part (licensing issue or some bug) return False # Let's try to use the research box if we didn't yet if not 'search' in url: url = "http://www.lyricsmode.com/search.php?what=songs&s=" + urllib.quote_plus( title.lower()) else: # the search gave more than on result, let's try to find our song url = "" start = song_search.find('<!--output-->') end = song_search.find('<!--/output-->', start) results = self.search_results_regex.findall( song_search, start, end) for result in results: if result[0].lower() in artist.lower(): url = "http://www.lyricsmode.com" + result[1] lyrics_found = True break if not url: # Is there a next page of results ? match = self.next_results_regex.search( song_search[end:]) if match: url = "http://www.lyricsmode.com/search.php" + match.group( 1) else: return False lyr = song_search.split( "<p id=\"lyrics_text\" class=\"ui-annotatable\">")[1].split( '</p><p id=\"lyrics_text_selected\">')[0] lyr = self.clean_br_regex.sub("\n", lyr).strip() lyr = self.clean_lyrics_regex.sub("", lyr).strip() lyr = self.normalize_lyrics_regex.sub( lambda m: unichr(int(m.group(1))), lyr.decode("ISO-8859-1")) lir = [] for line in lyr.splitlines(): line.strip() if line.find("Lyrics from:") < 0: lir.append(line) lyr = u"\n".join(lir) if lyr.startswith('These lyrics are missing'): return False lyrics.lyrics = lyr return True except: utilities.log( True, "%s: %s::%s (%d) [%s]" % (__title__, self.__class__.__name__, sys.exc_info()[2].tb_frame.f_code.co_name, sys.exc_info()[2].tb_lineno, sys.exc_info()[1])) return False
def getMP4Lyrics(filename): utilities.log(debug, "%s: trying %s" % (__title__, "lyrics embed with MP4 format")) # just use the generic taglib method for now return getLyricsGeneric(filename)
def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) artist = utilities.deAccent(lyrics.artist) title = utilities.deAccent(lyrics.title) try: # below is borowed from XBMC Lyrics url = "http://www.lyricsmode.com/lyrics/%s/%s/%s.html" % (artist.lower()[:1], artist.lower().replace("&","and").replace(" ","_"), title.lower().replace("&","and").replace(" ","_")) lyrics_found = False while True: utilities.log(debug, "%s: search url: %s" % (__title__, url)) song_search = urllib.urlopen(url).read() if song_search.find("<p id=\"lyrics_text\" class=\"ui-annotatable\">") >= 0: break if lyrics_found: # if we're here, we found the lyrics page but it didn't # contains the lyrics part (licensing issue or some bug) return False # Let's try to use the research box if we didn't yet if not 'search' in url: url = "http://www.lyricsmode.com/search.php?what=songs&s=" + urllib.quote_plus(title.lower()) else: # the search gave more than on result, let's try to find our song url = "" start = song_search.find('<!--output-->') end = song_search.find('<!--/output-->', start) results = self.search_results_regex.findall(song_search, start, end) for result in results: if result[0].lower() in artist.lower(): url = "http://www.lyricsmode.com" + result[1] lyrics_found = True break if not url: # Is there a next page of results ? match = self.next_results_regex.search(song_search[end:]) if match: url = "http://www.lyricsmode.com/search.php" + match.group(1) else: return False lyr = song_search.split("<p id=\"lyrics_text\" class=\"ui-annotatable\">")[1].split('</p><p id=\"lyrics_text_selected\">')[0] lyr = self.clean_br_regex.sub( "\n", lyr ).strip() lyr = self.clean_lyrics_regex.sub( "", lyr ).strip() lyr = self.normalize_lyrics_regex.sub( lambda m: unichr( int( m.group( 1 ) ) ), lyr.decode("ISO-8859-1") ) lir = [] for line in lyr.splitlines(): line.strip() if line.find("Lyrics from:") < 0: lir.append(line) lyr = u"\n".join( lir ) if lyr.startswith('These lyrics are missing'): return False lyrics.lyrics = lyr return True except: utilities.log(True, "%s: %s::%s (%d) [%s]" % ( __title__, self.__class__.__name__, sys.exc_info()[ 2 ].tb_frame.f_code.co_name, sys.exc_info()[ 2 ].tb_lineno, sys.exc_info()[ 1 ] )) return False