def parse_Mp3skull(song, maxpages=1): "Function connects to mp3skull.com and returns the .mp3 links in it" if utils.isHebrew(song): # Dilandau doesn't have hebrew log.warning("[Mp3skull] source has no hebrew songs. Skipping...") return song = urllib2.quote(song.encode("utf8")) for i in range(maxpages): # http://mp3skull.com/mp3/how_i_met_your_mother.html url = 'http://mp3skull.com/mp3/%s.html' % (song.replace('-','').replace(' ','_').replace('__','_').lower()) log.debug("[Mp3skull] Parsing %s... " % url) obj = urllib2.urlopen(url) response = obj.read() links = [] soup = BeautifulSoup(response) for link in soup.find_all('a', href=re.compile(r'\.mp3$')): links.append(link['href']) log.debug("[Mp3skull] found %d links" % len(links)) if not links: break for link in links: yield utils.cls.MetaUrl(link, 'Mp3skull')
def parse_Mp3skull(song, maxpages=1): "Function connects to mp3skull.com and returns the .mp3 links in it" if utils.isHebrew(song): # Dilandau doesn't have hebrew log.warning("[Mp3skull] source has no hebrew songs. Skipping...") return song = urllib2.quote(song.encode("utf8")) for i in range(maxpages): # http://mp3skull.com/mp3/how_i_met_your_mother.html url = 'http://mp3skull.com/mp3/%s.html' % (song.replace( '-', '').replace(' ', '_').replace('__', '_').lower()) log.debug("[Mp3skull] Parsing %s... " % url) obj = urllib2.urlopen(url) response = obj.read() links = [] soup = BeautifulSoup(response) for link in soup.find_all('a', href=re.compile(r'\.mp3$')): links.append(link['href']) log.debug("[Mp3skull] found %d links" % len(links)) if not links: break for link in links: yield utils.cls.MetaUrl(link, 'Mp3skull')
def slot_apply(self): self.ID3TagsToEdit = {} log.debug("Saving ID3 Data in cache...") self.ID3TagsToEdit['TIT2'] = TIT2(encoding=3, text=unicode(self.title.displayText())) self.ID3TagsToEdit['TPE1'] = TPE1(encoding=3, text=unicode(self.artist.displayText())) self.ID3TagsToEdit['TALB'] = TALB(encoding=3, text=unicode(self.album.displayText())) self.ID3TagsToEdit['TDRC'] = TDRC(encoding=3, text=unicode(self.date.displayText())) self.ID3TagsToEdit['COMM'] = COMM(encoding=3, text=unicode(config.id3tags_whitemark), desc='') lyrics = unicode(self.lyrics.toPlainText()).replace('\r\n','\n').replace('\n','\r\n') if lyrics: if utils.isHebrew(lyrics): self.ID3TagsToEdit['USLT'] = USLT(encoding=3, lang=u'heb', desc=u'', text=lyrics) else: self.ID3TagsToEdit['USLT'] = USLT(encoding=3, lang=u'eng', desc=u'', text=lyrics) else: self.ID3TagsToEdit['USLT'] = '' if self.albumArt_task == 'add' and self.pix_path: with open(self.pix_path, 'rb') as f: data = f.read() self.ID3TagsToEdit['APIC'] = APIC(encoding=0, mime=utils.guess_image_mime_type(self.pix_path), type=0, desc="", data=data) elif self.albumArt_task == 'delete': self.ID3TagsToEdit['APIC'] = '' # empty value doesn't add a new APIC tag. All other APIC tags are removed piror to insertion. self.close()
def run(self): # Called by Qt once the thread environment has been set up. if utils.isHebrew(self.lyric): track = Main.WebParser.MetadataGrabber.parse_shironet_songs_by_lyrics(self.lyric) else: track = Main.WebParser.MetadataGrabber.parse_songlyrics_songs_by_lyrics(self.lyric) if not track: track = Main.WebParser.MetadataGrabber.parse_animelyrics_songs_by_lyrics(self.lyric) self.output.emit(track)
def old_spell_fix(s): "Uses google API to fix spelling" data = u""" <spellrequest textalreadyclipped="0" ignoredups="1" ignoredigits="1" ignoreallcaps="0"> <text>%s</text> </spellrequest> """ data = data % s data_octets = data.encode('utf-8') new_s = s if log: log.debug("Checking spell suggestions for '%s'..." % s) if utils.isHebrew(s): log.debug("Search string is hebrew. Skipping on spell checking...") return s con = httplib.HTTPConnection("www.google.com", timeout=config.webservices_timeout) con.request("POST", "/tbproxy/spell?lang=en", data_octets, {'content-type': 'text/xml; charset=utf-8'}) response = con.getresponse().read() if log: log.debug("Response: %s" % response) try: dom = xml.dom.minidom.parseString(response) dom_data = dom.getElementsByTagName('spellresult')[0] except ExpatError: log.warning('spell_fix failed: ExpatError.') return s for node in dom_data.childNodes: att_o = int(node.attributes.item( 2).value) # The offset from the start of the text of the word att_l = int(node.attributes.item(1).value) # Length of misspelled word att_s = int( node.attributes.item(0).value) # Confidence of the suggestion if not node.firstChild: # no suggestions return s text = node.firstChild.data.split("\t")[0] # print "%s --> %s (s: %d)" % (s[att_o:att_o+att_l], text, att_s) if att_s: # if suggestion is confident new_s = new_s.replace(s[att_o:att_o + att_l], text) if log: if s == new_s: log.debug("No suggestions were accepted.") else: log.debug("Suggestions were accepted: %s --> %s." % (s, new_s)) return new_s
def run(self): # Called by Qt once the thread environment has been set up. if utils.isHebrew(self.artist): f = Main.WebParser.MetadataGrabber.shironet_artist_search else: f = Main.WebParser.MetadataGrabber.musicbrainz_artist_search artists = f(self.artist) if not artists: self.error.emit(tr("No data was found.")) return self.output.emit(artists)
def old_spell_fix(s): "Uses google API to fix spelling" data = u""" <spellrequest textalreadyclipped="0" ignoredups="1" ignoredigits="1" ignoreallcaps="0"> <text>%s</text> </spellrequest> """ data = data % s data_octets = data.encode('utf-8') new_s = s if log: log.debug("Checking spell suggestions for '%s'..." % s) if utils.isHebrew(s): log.debug("Search string is hebrew. Skipping on spell checking...") return s con = httplib.HTTPConnection("www.google.com", timeout=config.webservices_timeout) con.request("POST", "/tbproxy/spell?lang=en", data_octets, {'content-type': 'text/xml; charset=utf-8'}) response = con.getresponse().read() if log: log.debug("Response: %s" % response) try: dom = xml.dom.minidom.parseString(response) dom_data = dom.getElementsByTagName('spellresult')[0] except ExpatError: log.warning('spell_fix failed: ExpatError.') return s for node in dom_data.childNodes: att_o = int(node.attributes.item(2).value) # The offset from the start of the text of the word att_l = int(node.attributes.item(1).value) # Length of misspelled word att_s = int(node.attributes.item(0).value) # Confidence of the suggestion if not node.firstChild: # no suggestions return s text = node.firstChild.data.split("\t")[0] # print "%s --> %s (s: %d)" % (s[att_o:att_o+att_l], text, att_s) if att_s: # if suggestion is confident new_s = new_s.replace(s[att_o:att_o+att_l], text) if log: if s == new_s: log.debug("No suggestions were accepted.") else: log.debug("Suggestions were accepted: %s --> %s." % (s, new_s)) return new_s
def slot_apply(self): self.ID3TagsToEdit = {} log.debug("Saving ID3 Data in cache...") self.ID3TagsToEdit['TIT2'] = TIT2(encoding=3, text=unicode( self.title.displayText())) self.ID3TagsToEdit['TPE1'] = TPE1(encoding=3, text=unicode( self.artist.displayText())) self.ID3TagsToEdit['TALB'] = TALB(encoding=3, text=unicode( self.album.displayText())) self.ID3TagsToEdit['TDRC'] = TDRC(encoding=3, text=unicode( self.date.displayText())) self.ID3TagsToEdit['COMM'] = COMM(encoding=3, text=unicode( config.id3tags_whitemark), desc='') lyrics = unicode(self.lyrics.toPlainText()).replace('\r\n', '\n').replace( '\n', '\r\n') if lyrics: if utils.isHebrew(lyrics): self.ID3TagsToEdit['USLT'] = USLT(encoding=3, lang=u'heb', desc=u'', text=lyrics) else: self.ID3TagsToEdit['USLT'] = USLT(encoding=3, lang=u'eng', desc=u'', text=lyrics) else: self.ID3TagsToEdit['USLT'] = '' if self.albumArt_task == 'add' and self.pix_path: with open(self.pix_path, 'rb') as f: data = f.read() self.ID3TagsToEdit['APIC'] = APIC(encoding=0, mime=utils.guess_image_mime_type( self.pix_path), type=0, desc="", data=data) elif self.albumArt_task == 'delete': self.ID3TagsToEdit[ 'APIC'] = '' # empty value doesn't add a new APIC tag. All other APIC tags are removed piror to insertion. self.close()
def parse_MusicAddict(song, maxpages=10): "Function connects to MusicAddict.com and returns the .mp3 links in it" if utils.isHebrew(song): # Dilandau doesn't have hebrew log.warning("[MusicAddict] source has no hebrew songs. Skipping...") return song = urllib2.quote(song.encode("utf8")) for i in range(maxpages): # http://www.musicaddict.com/mp3/naruto-shippuden/page-1.html url = 'http://www.musicaddict.com/mp3/%s/page-%d.html' % (song.replace( '-', '').replace('_', '').replace(' ', '-').lower(), i + 1) log.debug("[MusicAddict] Parsing %s... " % url) obj = urllib2.urlopen(url) response = obj.read() DOMAIN = 'http://www.musicaddict.com/' t_links = [] links = [] soup = BeautifulSoup(response) for span in soup.find_all('span', class_='dl_link'): if not span.a['href'].startswith('http'): url = DOMAIN + span.a['href'] t_links.append(url) for link in t_links: obj = urllib2.urlopen(link) response = obj.read() soup = BeautifulSoup(response) js = soup.find('script', src=re.compile(r"js3/\d+.js")) jsUrl = DOMAIN + js['src'] obj = urllib2.urlopen(jsUrl) response = obj.read() url = re.search('src="(.+?)"', response).group(1) links.append(url) yield utils.cls.MetaUrl(url, 'MusicAddict') if not links: break
def parse(song, artist): "Function searches the web for song lyrics. returns a generator." s = '%s - %s' % (artist.strip(), song.strip()) if utils.isHebrew(song) or utils.isHebrew(artist): log.debug("Grabbing lyrics for %s from shironet.co.il..." % s) gen = parse_shironet(s) for lyrics in gen: yield lyrics else: # if english log.debug("Grabbing lyrics for %s from LyricsMode..." % song) gen = parse_LyricsMode(song, artist) for lyrics in gen: yield lyrics # lets try trim the ()'s or []'s _song = song _artist = artist song = utils.trim_between(_song) song = utils.trim_between(song, '[', ']') if song != _song: log.debug("Trimming %s --> %s" % (_song, song)) artist = utils.trim_between(_artist) artist = utils.trim_between(artist, '[', ']') if artist != _artist: log.debug("Trimming %s --> %s" % (_artist, artist)) ''' The following situation may happen: >>> song = "Train - 50 Ways To Say Goodbye" >>> artist = "Train" ''' x, y = utils.parse_title_from_filename(song) if artist: if artist.lower() == x.lower(): log.debug("Trimming %s --> %s" % (song, y)) song = y if artist.lower() == y.lower(): log.debug("Trimming %s --> %s" % (song, x)) song = x else: log.debug("Setting artist name from nothing to %s" % y) song, artist = x, y if artist != _artist or song != _song: s = '%s - %s' % (artist.strip(), song.strip()) log.debug("Grabbing lyrics for %s from LyricsMode..." % song) gen = parse_LyricsMode(song, artist) for lyrics in gen: yield lyrics log.debug("Grabbing lyrics for %s from OnlyLyrics..." % song) try: gen = parse_onlylyrics(song, artist) except socket.error: return for lyrics in gen: yield lyrics if len(artist.split()) == 2: flipped_artist = " ".join(artist.split(' ')[::-1]) log.debug("Grabbing lyrics for %s from OnlyLyrics (flipping last and first name)..." % song) try: gen = parse_onlylyrics(song, flipped_artist) except socket.error: return for lyrics in gen: yield lyrics log.debug("Grabbing lyrics for %s from ChartLyrics..." % song) try: gen = parse_ChartLyrics(song, artist) except socket.error: return for lyrics in gen: yield lyrics return
def parse(song, artist): "Function searches the web for song lyrics. returns a generator." s = '%s - %s' % (artist.strip(), song.strip()) if utils.isHebrew(song) or utils.isHebrew(artist): log.debug("Grabbing lyrics for %s from shironet.co.il..." % s) gen = parse_shironet(s) for lyrics in gen: yield lyrics else: # if english log.debug("Grabbing lyrics for %s from LyricsMode..." % song) gen = parse_LyricsMode(song, artist) for lyrics in gen: yield lyrics # lets try trim the ()'s or []'s _song = song _artist = artist song = utils.trim_between(_song) song = utils.trim_between(song, '[', ']') if song != _song: log.debug("Trimming %s --> %s" % (_song, song)) artist = utils.trim_between(_artist) artist = utils.trim_between(artist, '[', ']') if artist != _artist: log.debug("Trimming %s --> %s" % (_artist, artist)) ''' The following situation may happen: >>> song = "Train - 50 Ways To Say Goodbye" >>> artist = "Train" ''' x, y = utils.parse_title_from_filename(song) if artist: if artist.lower() == x.lower(): log.debug("Trimming %s --> %s" % (song, y)) song = y if artist.lower() == y.lower(): log.debug("Trimming %s --> %s" % (song, x)) song = x else: log.debug("Setting artist name from nothing to %s" % y) song, artist = x, y if artist != _artist or song != _song: s = '%s - %s' % (artist.strip(), song.strip()) log.debug("Grabbing lyrics for %s from LyricsMode..." % song) gen = parse_LyricsMode(song, artist) for lyrics in gen: yield lyrics log.debug("Grabbing lyrics for %s from OnlyLyrics..." % song) try: gen = parse_onlylyrics(song, artist) except socket.error: return for lyrics in gen: yield lyrics if len(artist.split()) == 2: flipped_artist = " ".join(artist.split(' ')[::-1]) log.debug( "Grabbing lyrics for %s from OnlyLyrics (flipping last and first name)..." % song) try: gen = parse_onlylyrics(song, flipped_artist) except socket.error: return for lyrics in gen: yield lyrics log.debug("Grabbing lyrics for %s from ChartLyrics..." % song) try: gen = parse_ChartLyrics(song, artist) except socket.error: return for lyrics in gen: yield lyrics return