def nfo_read(ifiles): try: mbid_xml_map = { xml_item["artist"]["mbid"].lower(): xml_item["artist"]["name"] for ifile in ifiles for xml_item in xml_decode.xml_decode(ifile)["musicdb"] } except Exception, e: print e mbid_xml_map = {}
def scrape_artists(artsr, outfile, translate): start = time.clock() arts = collections.defaultdict(set) for k,v in artsr.iteritems(): for vv in v: arts[vv].add(k) try: mbid_xml_map = {mbid : item for item in xml_decode.xml_decode(outfile)['musicdb'] for mbid in item['artist']['mbid'].upper().split('/')} except: mbid_xml_map = {} print "%%%%", mbid_xml_map ############################################ #Collect artist info collected = {} for idx, (mbid, artist) in enumerate(sorted(arts.iteritems())): print "\tArtist", idx, len(arts), repr(artist) xml_item = mbid_xml_map.get(mbid.upper(), None) if not xml_item: try: rec = lastfm_artist(artist,[],mbid) if "artist" in rec: rec["artist"]["fanart"] = {} rec["artist"]["fanart"]["thumb"] = backdrops_artist(artist,[],mbid)["artist"].get("fanart", []) collected[mbid] = rec except Exception, e: print e import traceback print traceback.format_exc() collected['mbid'] = {} else: #change single items to a list for attrs in (('genre',), ('thumb',), ('fanart', 'thumb')): a = xml_item['artist'] save = xml_item['artist'] for attr in attrs: if a: save = a a = a.get(attr, []) if isinstance(a, basestring): save[attr] = [a] collected[mbid] = xml_item
def scrape_albums(albums, ofile, translate): start = time.clock() ############################################ #xml info try: mbid_xml_map = {item['album']['mbid'].upper() : item for item in xml_decode.xml_decode(ofile)['musicdb']} except: mbid_xml_map = {} with codecs.open(ofile, "w", encoding='utf8') as fo: fo.write('<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>\n<musicdb>\n') for idx, ((album,mbid), artist) in enumerate(sorted(albums.iteritems())): print idx, len(albums), clean(album), mbid if mbid: for scraper in (mb_albums2, ): try: xml_item = mbid_xml_map.get(mbid.upper(), None) if not xml_item or xml_item['album']['artist'].strip() != escape(artist).strip(): tmp = scraper(album, mbid) else: tmp = xml_item if tmp and 'track' in tmp['album']: #overwrite to ensure that the artist names stay the same!! tmp['album']['artist'] = escape(artist) fo.write(translate(unquote(encode(tmp)))) fo.flush() #.encode("utf-8") break else: print "\tneed brainz!!", album except Exception, e: print "^^^^", e print traceback.format_exc() fo.flush() fo.write("\n</musicdb>\n")
def update(fname_in, typ, dirt): dirn, filen = os.path.split(fname_in) prefn, sufn = os.path.splitext(filen) fname_dir = os.path.join(dirn, dirt) fname_out = os.path.join(dirn, "{}_cached{}".format(prefn, sufn)) print "using\t", fname_in, fname_dir, fname_out rec = collections.defaultdict(list) for item in xml_decode.xml_decode(fname_in)['musicdb']: thumb = item[typ].get('thumb', []) if isinstance(thumb, basestring): thumb = [thumb] item[typ]['thumb'] = thumb rec[thumb[0] if thumb else None].append(item) def new_name(fname_dir, mbid): stub = mbid.replace('/','-') if len(stub) > 100: stub = stub[:100] ret= r'%s\%s.jpg' % (fname_dir, stub) return ret with codecs.open(fname_out, "w", encoding='utf8') as foo: foo.write('<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>\n<musicdb>\n') for idx, (filename, items) in enumerate(sorted(rec.iteritems(), key=lambda item: item[1][0][typ]['mbid'])): print print "\t{}/{}: ".format(idx, len(rec)) if filename: if not all( os.path.exists(ident(new_name(fname_dir, item[typ]['mbid']))) for item in items ): try: contents = scrapers.geturlbin(filename) #guessed wrong image due to amazon if len(contents) < 100: filename =scrapers.brainz_cover_art(items[0][typ]['mbid']) print "!!\tretrying with:", filename contents = scrapers.geturlbin(filename) except Exception,e: contents = False print print "^^^", e print filename,items import traceback print traceback.format_exc() else: contents = True else: contents = False for item in items: if contents: fname_new = new_name(fname_dir, item[typ]['mbid']) print "\t", item[typ]['mbid'], filename, fname_new if contents != True: with open(fname_new, 'wb') as fo: fo.write(contents) item[typ]['thumb'] = [fname_new] + item[typ]['thumb'] foo.write(tags_config.translate(scrapers.unquote(scrapers.encode(item)))) foo.write("\n") foo.flush() foo.write("\n</musicdb>\n")