def search(self, artist, files=None): if files is not None and self.searchby: keywords = format_value(files[0], self.searchby) else: keywords = artist keywords = re.sub(r'\s+', self._separator, keywords) if self.search_source is None: album = self.retrieve(keywords) return [album] if album else [] url = self._search_base.replace('%s', keywords) write_log(translate('Mp3tag', 'Retrieving search page: %s') % url) set_status(translate('Mp3tag', 'Retrieving search page...')) if self.html is None: page = get_encoding(urlopen(url), True, 'utf8')[1] else: page = get_encoding(self.html, True, 'utf8')[1] write_log(translate('Mp3tag', 'Parsing search page.')) set_status(translate('Mp3tag', 'Parsing search page...')) infos = parse_search_page(self.indexformat, page, self.search_source, url) return [(info, []) for info in infos]
def retrieve_album(url, coverurl=None, id_field=ALBUM_ID): write_log('Opening Album Page - %s' % url) album_page, code = urlopen(url, False, True) if album_page.find("featured new releases") >= 0: raise OldURLError("Old AMG URL used.") album_page = get_encoding(album_page, True, 'utf8')[1] info, tracks = parse_albumpage(album_page) info['#albumurl'] = url info['amg_url'] = url if 'album' in info: info['#extrainfo'] = [ info['album'] + u' at AllMusic.com', info['#albumurl'] ] if coverurl: try: write_log('Retrieving Cover - %s' % info['#cover-url']) cover = retrieve_cover(info['#cover-url']) except KeyError: write_log('No cover found.') cover = None except urllib2.URLError, e: write_log(u'Error: While retrieving cover %s - %s' % (info['#cover-url'], unicode(e))) cover = None
def retrieve_album(url, coverurl=None, id_field=ALBUM_ID): write_log('Opening Album Page - %s' % url) album_page, code = urlopen(url, False, True) if album_page.find("featured new releases") >= 0: raise OldURLError("Old AMG URL used.") album_page = get_encoding(album_page, True, 'utf8')[1] info, tracks = parse_albumpage(album_page) info['#albumurl'] = url info['amg_url'] = url if 'album' in info: info['#extrainfo'] = [ info['album'] + u' at AllMusic.com', info['#albumurl']] if coverurl: try: write_log('Retrieving Cover - %s' % info['#cover-url']) cover = retrieve_cover(info['#cover-url']) except KeyError: write_log('No cover found.') cover = None except urllib2.URLError, e: write_log(u'Error: While retrieving cover %s - %s' % (info['#cover-url'], unicode(e))) cover = None
def search(self, artist, files=None): if files is not None and self.searchby: keywords = format_value(files[0], self.searchby) else: keywords = artist keywords = re.sub('\s+', self._separator, keywords) if self.search_source is None: album = self.retrieve(keywords) return [album] if album else [] url = self._search_base.replace(u'%s', keywords) write_log(translate('Mp3tag', u'Retrieving search page: %s') % url) set_status(translate('Mp3tag', u'Retrieving search page...')) if self.html is None: page = get_encoding(urlopen(url), True, 'utf8')[1] else: page = get_encoding(self.html, True, 'utf8')[1] write_log(translate('Mp3tag', u'Parsing search page.')) set_status(translate('Mp3tag', u'Parsing search page...')) infos = parse_search_page(self.indexformat, page, self.search_source, url) return [(info, []) for info in infos]
def parse_searchpage(page, artist=None, album=None, id_field=ALBUM_ID): """Parses a search page and gets relevant info. Arguments: page -- html string with search page's html. artist -- artist to to check for in results. If found only results with that artist are returned. album -- album to check for in results. If found only results with with the album are returned. id_field -- key to use for the album id found. Return a tuple with the first element being == True if the list was truncated with only matching artist/albums. """ page = get_encoding(page, True, 'utf8')[1] soup = parse_html.SoupWrapper(parse_html.parse(page)) result_table = soup.find('ul', {'class': 'search-results'}) try: results = result_table.find_all('div', {'class': 'info'}) except AttributeError: return [] albums = [parse_search_element(result) for result in results] d = {} if artist and album: d = {'artist': artist, 'album': album} top = [album for album in albums if equal(d, album, True)] elif album: d = {'album': album} top = [album for album in albums if equal(d, album, True, ['album'])] if not top: top = [ album for album in albums if equal(d, album, False, ['album']) ] elif artist: d = {'artist': artist} top = [album for album in albums if equal(d, album, True, ['artist'])] if not ret: top = [ album for album in albums if equal(d, album, False, ['artist']) ] else: top = [] return False, albums
def parse_searchpage(page, artist=None, album=None, id_field=ALBUM_ID): """Parses a search page and gets relevant info. Arguments: page -- html string with search page's html. artist -- artist to to check for in results. If found only results with that artist are returned. album -- album to check for in results. If found only results with with the album are returned. id_field -- key to use for the album id found. Return a tuple with the first element being == True if the list was truncated with only matching artist/albums. """ page = get_encoding(page, True, 'utf8')[1] soup = parse_html.SoupWrapper(parse_html.parse(page)) result_table = soup.find('ul', {'class': 'search-results'}) try: results = result_table.find_all('div', {'class': 'info'}) except AttributeError: return [] albums = [parse_search_element(result) for result in results] d = {} if artist and album: d = {'artist': artist, 'album': album} top = [album for album in albums if equal(d, album, True)] elif album: d = {'album': album} top = [album for album in albums if equal(d, album, True, ['album'])] if not top: top = [album for album in albums if equal(d, album, False, ['album'])] elif artist: d = {'artist': artist} top = [album for album in albums if equal(d, album, True, ['artist'])] if not ret: top = [album for album in albums if equal(d, album, False, ['artist'])] else: top = [] return False, albums
def retrieve(self, info): if isinstance(info, str): text = info.replace(' ', self._separator) info = {} else: info = deepcopy(info) text = info['#url'] try: url = self.album_url % text except TypeError: url = self.album_url + text info['#url'] = url try: write_log(translate('Mp3tag', 'Retrieving album page: %s') % url) set_status(translate('Mp3tag', 'Retrieving album page...')) page = get_encoding(urlopen(url), True, 'utf8')[1] except: page = '' write_log(translate('Mp3tag', 'Parsing album page.')) set_status(translate('Mp3tag', 'Parsing album page...')) new_info, tracks = parse_album_page(page, self.album_source, url) info.update(dict((k, v) for k, v in new_info.items() if v)) if self._get_cover and COVER in info: cover_url = new_info[COVER] if isinstance(cover_url, str): info.update(retrieve_cover(cover_url)) else: info.update(list(map(retrieve_cover, cover_url))) if not tracks: tracks = None return info, tracks
def retrieve(self, info): if isinstance(info, basestring): text = info.replace(u' ', self._separator) info = {} else: info = deepcopy(info) text = info['#url'] try: url = self.album_url % text except TypeError: url = self.album_url + text info['#url'] = url try: write_log(translate('Mp3tag', u'Retrieving album page: %s') % url) set_status(translate('Mp3tag', u'Retrieving album page...')) page = get_encoding(urlopen(url), True, 'utf8')[1] except: page = u'' write_log(translate('Mp3tag', u'Parsing album page.')) set_status(translate('Mp3tag', u'Parsing album page...')) new_info, tracks = parse_album_page(page, self.album_source, url) info.update(dict((k,v) for k,v in new_info.iteritems() if v)) if self._get_cover and COVER in info: cover_url = new_info[COVER] if isinstance(cover_url, basestring): info.update(retrieve_cover(cover_url)) else: info.update(map(retrieve_cover, cover_url)) if not tracks: tracks = None return info, tracks
set_status('Retrieving album.') write_log('Retrieving album.') write_log('Album URL - %s' % albuminfo['#albumurl']) url = albuminfo['#albumurl'] try: if self._useid: info, tracks, cover = retrieve_album(url, self._getcover) else: info, tracks, cover = retrieve_album(url, self._getcover) except urllib.error.URLError as e: write_log('Error: While retrieving album URL %s - %s' % (url, str(e))) raise RetrievalError(str(e)) if cover: info.update(cover) albuminfo = albuminfo.copy() albuminfo.update(info) return albuminfo, tracks def applyPrefs(self, args): self._getcover = args[0] self._useid = args[1] info = AllMusic if __name__ == '__main__': f = get_encoding(open(sys.argv[1], 'r').read(), True)[1] x = parse_albumpage(f) print(x)
except KeyError: set_status('Retrieving album.') write_log('Retrieving album.') write_log('Album URL - %s' % albuminfo['#albumurl']) url = albuminfo['#albumurl'] try: if self._useid: info, tracks, cover = retrieve_album(url, self._getcover) else: info, tracks, cover = retrieve_album(url, self._getcover) except urllib2.URLError, e: write_log(u'Error: While retrieving album URL %s - %s' % (url, unicode(e))) raise RetrievalError(unicode(e)) if cover: info.update(cover) albuminfo = albuminfo.copy() albuminfo.update(info) return albuminfo, tracks def applyPrefs(self, args): self._getcover = args[0] self._useid = args[1] info = AllMusic if __name__ == '__main__': f = get_encoding(open(sys.argv[1], 'r').read(), True)[1] x = parse_albumpage(f) print x