def __sense(url, releases): """Retrieves releases for specified band id. Injected into Bandsensor. Args: url: ID of the current band (it's mbid format here) releases: types of releases to search for Returns: tuple -- (<url>, <list-of-tuples> -- (<album>, <year>)) Note: It is meant for internal usage only! """ result = dict() offset = 0 count = 100 partial = dict() while count > 0: soup = reqread( u'http://www.musicbrainz.org/ws/2/release?artist=' + url + u'&type=' + u'|'.join(releases).lower() + u'&offset=' + unicode(offset) + u'&limit=100') (partial, n) = __getalbums(soup, partial) result.update(partial) count = n - count - offset offset += 100 return (url, list(result.iteritems()))
def work(artist, element, urls, releases): """Retrieves new or updated info for specified artist. Args: artist: artist to check against element: db element containing existing info (it is db[<artist_name>]) urls: urls previously retrieved for given artist releases: types of releases to check for Note: Should be threaded in real application. """ if urls and u'metalArchives' in urls.keys(): (url, albums) = __sense(urls[u'metalArchives'], releases) return { u'choice': url, u'result': albums, u'errors': set(), u'artist': artist } artist_ = urllib2.quote( artist.replace(u'&', u'and').replace(u'/', u'').encode(u'utf-8') ).replace(u'%20', u'+') json = reqread( u'http://www.metal-archives.com/search/ajax-band-search/?field=name&query=' + artist_ + '&sEcho=1&iColumns=3&sColumns=&iDisplayStart=0&iDisplayLength=100&sNames=%2C%2C' ) return __parse2(json, artist, element, releases)
def init(): res = reqread( u'http://www.progarchives.com/bands-alpha.asp?letter=*' ).decode(u'latin-1').encode(u'utf-8') def __internal(context, artist, url): glob[str(url[0])[14:]] = artist[0].text return False root = etree.HTML(res) ns = etree.FunctionNamespace(u'http://fake.gayeogi/functions') ns.prefix = u'pa' ns[u'test'] = __internal root.xpath(u'//td/a[pa:test(strong, @href)]')
def __sense(url, releases): """Retrieves releases for specified band id. Injected into Bandsensor. Args: url: ID of the current band releases: types of releases to search for Returns: tuple -- (<url>, <list-of-tuples> -- (<album>, <year>)) Note: It is meant for internal usage only! """ soup = reqread(u'http://www.metal-archives.com/band/discography/id/' + url + u'/tab/all').decode(u'utf-8') return (url, __getalbums(soup, releases))
def __sense(url, releases): """Retrieves releases for specified band id. Injected into Bandsensor. Args: url: ID of the current band releases: types of releases to search for Returns: tuple -- (<url>, <list-of-tuples> -- (<album>, <year>)) Note: It is meant for internal usage only! """ res = reqread( u'http://www.progarchives.com/artist.asp?id=' + url ).decode(u'latin-1').encode(u'utf-8') artist = glob[url] def __enabled(context, element): for release in releases: if element[0].text.lower().startswith( artist.lower() + u' ' + di[release].lower() ): return True return False result = list() def __internal(context, albums, years): result.append((albums[0].text.strip(), years[0].text.strip())) return False root = etree.HTML(res) ns = etree.FunctionNamespace(u'http://fake.gayeogi/functions') ns.prefix = u'pa' ns[u'test'] = __internal ns[u'enabled'] = __enabled root.xpath( u'//h3[pa:enabled(.)]/following-sibling::*[1]//td[pa:test(a[2]/strong, span[3])]' ) return (url, result)
def work(artist, element, urls, releases): """Retrieves new or updated info for specified artist. Args: artist: artist to check against element: db element containing existing info (it is db[<artist_name>]) urls: urls previously retrieved for given artist releases: types of releases to check for Note: Should be threaded in real application. """ if urls and u'musicbrainz' in urls.keys(): (url, albums) = __sense(urls[u'musicbrainz'], releases) return { u'choice': url, u'result': albums, u'errors': set(), u'artist': artist } artist_ = urllib2.quote(artist.replace(u'/', u'').encode( u'utf-8')).replace(u'%20', u'+') urls_ = reqread( u'http://search.musicbrainz.org/ws/2/artist/?query=artist:' + artist_ + u'*&fmt=json') if not urls_: raise NoBandError() sensor = Bandsensor(__sense, JParse(artist).decode(urls_), element, releases) data = sensor.run() if not data: raise NoBandError() return { u'choice': data[0], u'result': data[1], u'errors': sensor.errors }