def update_relations(self): """ relation mappings (urls) """ relations = self._data.get('relations') if relations: _relation_qs = self.obj.relations.all() _relation_urls = [strip_http(r.url) for r in _relation_qs] for relation in [r for r in relations if 'url' in r]: _url = relation['url']['resource'] if relation['type'] == 'official homepage': _service = 'official' else: _service = get_service_by_url(_url) if relation['type'] in MB_RELATION_USE_SERVICES and not strip_http(_url) in _relation_urls: rel = Relation( content_object=self.obj, url=_url, service=_service ) rel.save() if not 'relations' in self._changes: self._changes['relations'] = [] self._changes['relations'].append(_url)
def update_relations(self): """ relation mappings (urls) """ if not self._data: return relations = self._data.get('urls') if relations: _relation_qs = self.obj.relations.all() _relation_urls = [strip_http(strip_query_params(r.url)) for r in _relation_qs] for url in relations: _service = get_service_by_url(url) # log.debug('{}: {}'.format(_service, url)) if _service in DISCOGS_RELATION_USE_SERVICES and not strip_http(strip_query_params(url))in _relation_urls: rel = Relation( content_object=self.obj, url=strip_query_params(url), service=_service ) rel.save() if not 'relations' in self._changes: self._changes['relations'] = [] self._changes['relations'].append(url)
def update_relations(self): """ relation mappings (urls) """ relations = self._data.get('relations') if relations: _relation_qs = self.obj.relations.all() _relation_urls = [strip_http(r.url) for r in _relation_qs] for relation in [r for r in relations if 'url' in r]: _url = relation['url']['resource'] if relation['type'] == 'official homepage': _service = 'official' else: _service = get_service_by_url(_url) if relation[ 'type'] in MB_RELATION_USE_SERVICES and not strip_http( _url) in _relation_urls: rel = Relation(content_object=self.obj, url=_url, service=_service) rel.save() if not 'relations' in self._changes: self._changes['relations'] = [] self._changes['relations'].append(_url)
def run(self, obj): log = logging.getLogger('util.importer.Importer.run') # map it = obj.import_tag print '*****************************' print it print '*****************************' if 'mb_track_id' in it: log.info('mb_track_id: %s' % (it['mb_track_id'])) m = Media(name=it['name']) # get/create release # lookup by mb_id r = None if 'mb_release_id' in it: lrs = lookup.release_by_mb_id(it['mb_release_id']) #print 'LRS!:' #print lrs if lrs.count() > 0: r = lrs[0] if not r: r, created = Release.objects.get_or_create(name=it['release']) # assign mb_relation if 'mb_release_id' in it: url = 'http://musicbrainz.org/release/%s' % it['mb_release_id'] print 'musicbrainz_url: %s' % url rel = Relation(content_object=r, url=url) rel.save() # complete medatata self.complete_release_meta(r, it) try: if r: m.release = r except Exception, e: print e
def complete_release_meta(self, r, it): includes = [ "artists", "labels", "recordings", "release-groups", "media", "artist-credits", "discids", "puids", "isrcs", "artist-rels", "label-rels", "recording-rels", "release-rels", "release-group-rels", "url-rels", "work-rels", "recording-level-rels", "work-level-rels" ] mb_release = musicbrainzngs.get_release_by_id( id=it['mb_release_id'], includes=includes) mbr = mb_release['release'] print '***************************************' if 'status' in mbr: print mbr['status'] if 'title' in mbr: print mbr['title'] if 'url-relation-list' in mbr: # print mbr['url-relation-list'] for rel in mbr['url-relation-list']: print rel if rel['type'] == 'discogs': print 'DISCOGS: %s' % rel['target'] try: # pass rel = Relation(content_object=r, url=rel['target']) rel.save() except Exception, e: print 'RELATION EXCEPTION' print e try: discogs_image = discogs_image_by_url(rel['target']) img = filer_extra.url_to_file(discogs_image, r.folder) r.main_image = img except: pass
def complete_release_meta(self, r, it): includes = [ "artists", "labels", "recordings", "release-groups", "media", "artist-credits", "discids", "puids", "isrcs", "artist-rels", "label-rels", "recording-rels", "release-rels", "release-group-rels", "url-rels", "work-rels", "recording-level-rels", "work-level-rels" ] mb_release = musicbrainzngs.get_release_by_id(id=it['mb_release_id'], includes=includes) mbr = mb_release['release'] print '***************************************' if 'status' in mbr: print mbr['status'] if 'title' in mbr: print mbr['title'] if 'url-relation-list' in mbr: # print mbr['url-relation-list'] for rel in mbr['url-relation-list']: print rel if rel['type'] == 'discogs': print 'DISCOGS: %s' % rel['target'] try: # pass rel = Relation(content_object=r, url=rel['target']) rel.save() except Exception, e: print 'RELATION EXCEPTION' print e try: discogs_image = discogs_image_by_url(rel['target']) img = filer_extra.url_to_file(discogs_image, r.folder) r.main_image = img except: pass
def provider_update(request, *args, **kwargs): log = logging.getLogger('alibrary.ajax.api_lookup') print kwargs item_type = kwargs.get('item_type', None) item_id = kwargs.get('item_id', None) provider = kwargs.get('provider', None) uri = kwargs.get('uri', None) log.debug('uri: %s' % (uri)) item = None data = {} try: if item_type == 'release': item = Release.objects.get(pk=item_id) if item_type == 'artist': item = Artist.objects.get(pk=item_id) if item_type == 'label': item = Label.objects.get(pk=item_id) if item_type == 'media': item = Media.objects.get(pk=item_id) if item and uri: rel = Relation(content_object=item, url=uri) rel.save() data = { 'service': '%s' % rel.service, 'url': '%s' % rel.url, } except Exception, e: log.warning('%s' % e)
def provider_update(request, *args, **kwargs): log = logging.getLogger("alibrary.ajax.api_lookup") print kwargs item_type = kwargs.get("item_type", None) item_id = kwargs.get("item_id", None) provider = kwargs.get("provider", None) uri = kwargs.get("uri", None) log.debug("uri: %s" % (uri)) item = None data = {} try: if item_type == "release": item = Release.objects.get(pk=item_id) if item_type == "artist": item = Artist.objects.get(pk=item_id) if item_type == "label": item = Label.objects.get(pk=item_id) if item_type == "media": item = Media.objects.get(pk=item_id) if item and uri: rel = Relation(content_object=item, url=uri) rel.save() data = {"service": "%s" % rel.service, "url": "%s" % rel.url} except Exception, e: log.warning("%s" % e)
def provider_update(request, *args, **kwargs): item_type = kwargs.get('item_type', None) item_id = kwargs.get('item_id', None) provider = kwargs.get('provider', None) uri = kwargs.get('uri', None) log.debug('uri: %s' % (uri)) item = None data = {} try: if item_type == 'release': item = Release.objects.get(pk=item_id) if item_type == 'artist': item = Artist.objects.get(pk=item_id) if item_type == 'label': item = Label.objects.get(pk=item_id) if item_type == 'media': item = Media.objects.get(pk=item_id) if item and uri: rel = Relation(content_object=item, url=uri) # disabled save, as this involves heavy issues! #rel.save() data = { 'service': '%s' % rel.service, 'url': '%s' % rel.url, } except Exception as e: log.warning('%s' % e) return json.dumps(data)
def import_release(self, lr): print 'trying to get related data' lms = lr.mediasreleases_set.all() las = lr.artistsreleases_set.all() lls = lr.labelsreleases_set.all() print 'legacy_id: %s' % lr.id r, created = Release.objects.get_or_create(legacy_id=lr.id) if created: print 'Not here yet -> created' else: print 'found by legacy_id -> use' """ Release creation/update & mapping """ r.slug = slugify(lr.name) r.legacy_id = lr.id """ Mapping new <> legacy """ r.name = lr.name print u'%s' % r.id if lr.catalognumber: r.catalognumber = lr.catalognumber if lr.releasetype: r.releasetype = lr.releasetype if lr.releasestatus: r.releasestatus = lr.releasestatus if lr.published: r.publish_date = lr.published if lr.notes: r.excerpt = lr.notes if lr.totaltracks: r.totaltracks = lr.totaltracks print 'totaltracks: %s' % r.totaltracks if lr.releasecountry and len(lr.releasecountry) == 2: r.release_country = lr.releasecountry # "relation" mapping if lr.discogs_releaseid and lr.discogs_releaseid != 'nf': url = 'http://www.discogs.com/release/%s' % lr.discogs_releaseid print 'discogs_url: %s' % url rel = Relation(content_object=r, url=url) rel.save() if lr.myspace_url: print 'myspace_url: %s' % lr.myspace_url rel = Relation(content_object=r, url=lr.myspace_url) rel.save() if lr.wikipedia_url: print 'wikipedia_url: %s' % lr.wikipedia_url rel = Relation(content_object=r, url=lr.wikipedia_url) rel.save() if lr.releasedate: print 'legacy-date: %s' % lr.releasedate seg = lr.releasedate.split('-') print seg # year only if len(seg) == 1: r.releasedate = '%s-%s-%s' % (seg[0], '01', '01') # year & month only if len(seg) == 2: if seg[1] in ('00', '0'): seg[1] = '01' r.releasedate = '%s-%s-%s' % (seg[0], seg[1], '01') # full date if len(seg) == 3 and seg[0] != '0000': if seg[1] in ('00', '0'): seg[1] = '01' if seg[2] in ('00', '0'): seg[2] = '01' r.releasedate = '%s-%s-%s' % (seg[0], seg[1], seg[2] ) print 'new-date: %s' % r.releasedate #time.sleep(2) r.save() # id: try: img_url = 'http://openbroadcast.ch/static/images/release/%s/original.jpg' % id_to_location(r.legacy_id) print img_url img = filer_extra.url_to_file(img_url, r.folder) r.main_image = img r.save() except: pass """ Tag Mapping """ ntrs = NtagsReleases.objects.using('legacy').filter(release_id=lr.id) # r.tags.clear() for ntr in ntrs: print 'Tag ID: %s' % ntr.ntag_id try: nt = Ntags.objects.using('legacy').get(id=ntr.ntag_id) print 'Tag Name: %s' % nt.name Tag.objects.add_tag(r, u'"%s"' % nt.name) except Exception, e: print e pass
def import_release(self, lr): print 'trying to get related data' lms = lr.mediasreleases_set.all() las = lr.artistsreleases_set.all() lls = lr.labelsreleases_set.all() print 'legacy_id: %s' % lr.id r, created = Release.objects.get_or_create(legacy_id=lr.id) if created: print 'Not here yet -> created' else: print 'found by legacy_id -> use' """ Release creation/update & mapping """ r.slug = slugify(lr.name) r.legacy_id = lr.id """ Mapping new <> legacy """ r.name = lr.name print u'%s' % r.id if lr.catalognumber: r.catalognumber = lr.catalognumber if lr.releasetype: r.releasetype = lr.releasetype if lr.releasestatus: r.releasestatus = lr.releasestatus if lr.published: r.publish_date = lr.published if lr.notes: r.excerpt = lr.notes if lr.totaltracks: r.totaltracks = lr.totaltracks print 'totaltracks: %s' % r.totaltracks if lr.releasecountry and len(lr.releasecountry) == 2: r.release_country = lr.releasecountry # "relation" mapping if lr.discogs_releaseid and lr.discogs_releaseid != 'nf': url = 'http://www.discogs.com/release/%s' % lr.discogs_releaseid print 'discogs_url: %s' % url rel = Relation(content_object=r, url=url) rel.save() if lr.myspace_url: print 'myspace_url: %s' % lr.myspace_url rel = Relation(content_object=r, url=lr.myspace_url) rel.save() if lr.wikipedia_url: print 'wikipedia_url: %s' % lr.wikipedia_url rel = Relation(content_object=r, url=lr.wikipedia_url) rel.save() if lr.releasedate: print 'legacy-date: %s' % lr.releasedate seg = lr.releasedate.split('-') print seg # year only if len(seg) == 1: r.releasedate = '%s-%s-%s' % (seg[0], '01', '01') # year & month only if len(seg) == 2: if seg[1] in ('00', '0'): seg[1] = '01' r.releasedate = '%s-%s-%s' % (seg[0], seg[1], '01') # full date if len(seg) == 3 and seg[0] != '0000': if seg[1] in ('00', '0'): seg[1] = '01' if seg[2] in ('00', '0'): seg[2] = '01' r.releasedate = '%s-%s-%s' % (seg[0], seg[1], seg[2]) print 'new-date: %s' % r.releasedate #time.sleep(2) r.save() # id: try: img_url = 'http://openbroadcast.ch/static/images/release/%s/original.jpg' % id_to_location( r.legacy_id) print img_url img = filer_extra.url_to_file(img_url, r.folder) r.main_image = img r.save() except: pass """ Tag Mapping """ ntrs = NtagsReleases.objects.using('legacy').filter(release_id=lr.id) # r.tags.clear() for ntr in ntrs: print 'Tag ID: %s' % ntr.ntag_id try: nt = Ntags.objects.using('legacy').get(id=ntr.ntag_id) print 'Tag Name: %s' % nt.name Tag.objects.add_tag(r, u'"%s"' % nt.name) except Exception, e: print e pass
def run(self, legacy_obj): from alibrary.models import Release, Relation status = 1 log = logging.getLogger('util.migrator.run') log.info('migrate release: %s' % legacy_obj.name) obj, created = Release.objects.get_or_create(legacy_id=legacy_obj.id) if created: log.info('object created: %s' % obj.pk) else: log.info('object found by legacy_id: %s' % obj.pk) if created: """ Mapping data 1-to-1 fields """ obj.name = legacy_obj.name obj.created = legacy_obj.created obj.updated = legacy_obj.updated if legacy_obj.catalognumber: log.debug('catalognumber: %s' % legacy_obj.catalognumber) obj.catalognumber = legacy_obj.catalognumber if legacy_obj.releasetype: log.debug('releasetype: %s' % legacy_obj.releasetype) obj.releasetype = legacy_obj.releasetype if legacy_obj.releasestatus: obj.releasestatus = legacy_obj.releasestatus if legacy_obj.published: obj.publish_date = legacy_obj.published if legacy_obj.notes: obj.description = legacy_obj.notes if legacy_obj.totaltracks: obj.totaltracks = legacy_obj.totaltracks if legacy_obj.releasecountry and len(legacy_obj.releasecountry) == 2: obj.release_country = legacy_obj.releasecountry if legacy_obj.releasedate: log.debug('legacy-date: %s' % legacy_obj.releasedate) date = legacy_obj.releasedate if len(date) == 4: date = '%s-00-00' % (date) elif len(date) == 7: date = '%s-00' % (date) elif len(date) == 10: date = '%s' % (date) re_date = re.compile('^\d{4}-\d{2}-\d{2}$') if re_date.match(date) and date != '0000-00-00': try: import time valid_date = time.strptime('%s' % date, '%Y-%m-%d') obj.releasedate_approx = '%s' % date except Exception, e: print 'Invalid date!' print e """ Relation mapping """ if legacy_obj.discogs_releaseid and legacy_obj.discogs_releaseid != 'nf': url = 'http://www.discogs.com/release/%s' % legacy_obj.discogs_releaseid log.debug('discogs_url: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.mb_releaseid and legacy_obj.mb_releaseid != 'nf': url = 'http://musicbrainz.org/release/%s' % legacy_obj.mb_releaseid log.debug('mb_releaseid: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.myspace_url and legacy_obj.myspace_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.myspace_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.wikipedia_url and legacy_obj.wikipedia_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.wikipedia_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.facebook_url and legacy_obj.facebook_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.facebook_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.lastfm_url and legacy_obj.lastfm_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.lastfm_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.release_url and legacy_obj.release_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.release_url, service='official') log.debug('url: %s' % rel.url) rel.save() if legacy_obj.various_links and len(legacy_obj.various_links) > 10: for entry in legacy_obj.various_links.splitlines(): try: validate_url(entry) rel = Relation(content_object=obj, url=entry) log.debug('url (from various): %s' % rel.url) rel.save() except ValidationError, e: print e
def run(self, legacy_obj): from alibrary.models import Release, Relation status = 1 log = logging.getLogger('util.migrator.run') log.info('migrate release: %s' % legacy_obj.name) obj, created = Release.objects.get_or_create(legacy_id=legacy_obj.id) if created: log.info('object created: %s' % obj.pk) else: log.info('object found by legacy_id: %s' % obj.pk) if created: """ Mapping data 1-to-1 fields """ obj.name = legacy_obj.name obj.created = legacy_obj.created obj.updated = legacy_obj.updated if legacy_obj.catalognumber: log.debug('catalognumber: %s' % legacy_obj.catalognumber) obj.catalognumber = legacy_obj.catalognumber if legacy_obj.releasetype: log.debug('releasetype: %s' % legacy_obj.releasetype) obj.releasetype = legacy_obj.releasetype if legacy_obj.releasestatus: obj.releasestatus = legacy_obj.releasestatus if legacy_obj.published: obj.publish_date = legacy_obj.published if legacy_obj.notes: obj.description = legacy_obj.notes if legacy_obj.totaltracks: obj.totaltracks = legacy_obj.totaltracks if legacy_obj.releasecountry and len( legacy_obj.releasecountry) == 2: obj.release_country = legacy_obj.releasecountry if legacy_obj.releasedate: log.debug('legacy-date: %s' % legacy_obj.releasedate) date = legacy_obj.releasedate if len(date) == 4: date = '%s-00-00' % (date) elif len(date) == 7: date = '%s-00' % (date) elif len(date) == 10: date = '%s' % (date) re_date = re.compile('^\d{4}-\d{2}-\d{2}$') if re_date.match(date) and date != '0000-00-00': try: import time valid_date = time.strptime('%s' % date, '%Y-%m-%d') obj.releasedate_approx = '%s' % date except Exception, e: print 'Invalid date!' print e """ Relation mapping """ if legacy_obj.discogs_releaseid and legacy_obj.discogs_releaseid != 'nf': url = 'http://www.discogs.com/release/%s' % legacy_obj.discogs_releaseid log.debug('discogs_url: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.mb_releaseid and legacy_obj.mb_releaseid != 'nf': url = 'http://musicbrainz.org/release/%s' % legacy_obj.mb_releaseid log.debug('mb_releaseid: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.myspace_url and legacy_obj.myspace_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.myspace_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.wikipedia_url and legacy_obj.wikipedia_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.wikipedia_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.facebook_url and legacy_obj.facebook_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.facebook_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.lastfm_url and legacy_obj.lastfm_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.lastfm_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.release_url and legacy_obj.release_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.release_url, service='official') log.debug('url: %s' % rel.url) rel.save() if legacy_obj.various_links and len(legacy_obj.various_links) > 10: for entry in legacy_obj.various_links.splitlines(): try: validate_url(entry) rel = Relation(content_object=obj, url=entry) log.debug('url (from various): %s' % rel.url) rel.save() except ValidationError, e: print e
class ArtistMigrator(Migrator): def __init__(self): log = logging.getLogger('util.migrator.__init__') def run(self, legacy_obj): from alibrary.models import Artist, Relation status = 1 log = logging.getLogger('util.migrator.run') log.info('migrate artist: %s' % legacy_obj.name) obj, created = Artist.objects.get_or_create(legacy_id=legacy_obj.id) if created: log.info('object created: %s' % obj.pk) else: log.info('object found by legacy_id: %s' % obj.pk) if created: """ Mapping data 1-to-1 fields """ obj.name = legacy_obj.name obj.created = legacy_obj.created obj.updated = legacy_obj.updated obj.published = legacy_obj.published if legacy_obj.profile: obj.description = legacy_obj.profile if legacy_obj.artisttype: obj.type = legacy_obj.artisttype if legacy_obj.realname: obj.real_name = legacy_obj.realname if legacy_obj.country: log.debug('country: %s' % legacy_obj.country) country = None if len(legacy_obj.country) == 2: try: country = Country.objects.get( iso2_code=legacy_obj.country) except Exception, e: pass else: try: country = Country.objects.get( printable_name=legacy_obj.country) except Exception, e: pass if country: log.debug('got country: %s' % country.name) obj.country = country if legacy_obj.aliases: log.debug('aliases: %s' % legacy_obj.aliases) for alias in legacy_obj.aliases.split(','): log.debug('alias: %s' % alias) try: a, c = Artist.objects.get_or_create( name=alias.rstrip(' ').lstrip(' ')) obj.aliases.add(a) except: try: a = Artist.objects.filter( name=alias.rstrip(' ').lstrip(' '))[0] obj.aliases.add(a) except: pass """ Relation mapping """ if legacy_obj.discogs_artistid and legacy_obj.discogs_artistid != 'nf': url = 'http://www.discogs.com/artist/%s' % legacy_obj.discogs_artistid log.debug('discogs_url: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.mb_artistid and legacy_obj.mb_artistid != 'nf': url = 'http://musicbrainz.org/artist/%s' % legacy_obj.mb_artistid log.debug('mb_artistid: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.myspace_url and legacy_obj.myspace_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.myspace_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.wikipedia_url and legacy_obj.wikipedia_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.wikipedia_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.facebook_url and legacy_obj.facebook_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.facebook_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.lastfm_url and legacy_obj.lastfm_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.lastfm_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.soundcloud_url and legacy_obj.soundcloud_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.soundcloud_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.website and legacy_obj.website != 'nf': rel = Relation(content_object=obj, url=legacy_obj.website, service='official') log.debug('url: %s' % rel.url) rel.save() if legacy_obj.various_links and len(legacy_obj.various_links) > 10: for entry in legacy_obj.various_links.splitlines(): try: validate_url(entry) rel = Relation(content_object=obj, url=entry) log.debug('url (from various): %s' % rel.url) rel.save() except ValidationError, e: print e
def mb_complete_artist(self, obj, mb_id): log = logging.getLogger('util.importer.mb_complete_artist') log.info('complete artist, a: %s | mb_id: %s' % (obj.name, mb_id)) self.mb_completed.append(mb_id) inc = ('url-rels', 'tags') url = 'http://%s/ws/2/artist/%s/?fmt=json&inc=%s' % ( MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() print '#########################################################################' self.pp.pprint(result) discogs_url = None discogs_image = None valid_relations = ( 'wikipedia', 'allmusic', 'BBC Music page', 'social network', 'official homepage', 'youtube', 'myspace', ) relations = result.get('relations', ()) for relation in relations: if relation['type'] == 'discogs': log.debug('got discogs url for artist: %s' % relation['url']) discogs_url = relation['url'] if relation['type'] in valid_relations: log.debug('got %s url for artist: %s' % (relation['type'], relation['url'])) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) if relation['type'] == 'official homepage': rel.service = 'official' rel.save() if discogs_url: try: rel = Relation.objects.get(object_id=obj.pk, url=discogs_url) except: rel = Relation(content_object=obj, url=discogs_url) rel.save() # try to get image try: discogs_image = discogs_image_by_url(discogs_url, 'resource_url') log.debug('discogs image located at: %s' % discogs_image) except: pass # try to load & assign image if discogs_image: try: img = filer_extra.url_to_file(discogs_image, obj.folder) obj.main_image = img obj.save() except: log.info('unable to assign discogs image') if discogs_url: discogs_id = None try: # TODO: not sure if always working discogs_id = discogs_id_by_url(discogs_url) log.info('extracted discogs id: %s' % discogs_id) except: pass if discogs_id: url = 'http://api.discogs.com/artists/%s' % discogs_id r = requests.get(url) dgs_result = r.json() self.pp.pprint(dgs_result) """ styles = dgs_result.get('styles', ()) for style in styles: log.debug('got style: %s' % (style)) Tag.objects.add_tag(obj, '"%s"' % style) """ profile = dgs_result.get('profile', None) if profile: obj.biography = profile realname = dgs_result.get('realname', None) if realname: obj.real_name = realname """ verry hackish part here, just as proof-of-concept """ aliases = dgs_result.get('aliases', ()) for alias in aliases: try: log.debug('got alias: %s' % alias['name']) # TODO: improve! handle duplicates! time.sleep(1.1) r = requests.get(alias['resource_url']) aa_result = r.json() aa_discogs_url = aa_result.get('uri', None) aa_name = aa_result.get('name', None) aa_profile = aa_result.get('profile', None) if aa_discogs_url and aa_name: l_as = lookup.artist_by_relation_url( aa_discogs_url) l_a = None if len(l_as) < 1: l_a = Artist(name=aa_name, biography=aa_profile) l_a.save() rel = Relation(content_object=l_a, url=aa_discogs_url) rel.save() if len(l_as) == 1: l_a = l_as[0] print l_as[0] if l_a: obj.aliases.add(l_a) except: pass """ verry hackish part here, just as proof-of-concept """ members = dgs_result.get('members', ()) for member in members: try: log.debug('got member: %s' % member['name']) # TODO: improve! handle duplicates! time.sleep(1.1) r = requests.get(member['resource_url']) ma_result = r.json() ma_discogs_url = ma_result.get('uri', None) ma_name = ma_result.get('name', None) ma_profile = ma_result.get('profile', None) if ma_discogs_url and ma_name: l_as = lookup.artist_by_relation_url( ma_discogs_url) l_a = None if len(l_as) < 1: l_a = Artist(name=ma_name, biography=ma_profile) l_a.save() rel = Relation(content_object=l_a, url=ma_discogs_url) rel.save() if len(l_as) == 1: l_a = l_as[0] print l_as[0] if l_a: ma = ArtistMembership.objects.get_or_create( parent=obj, child=l_a) except: pass type = result.get('type', None) if type: log.debug('got type: %s' % (type)) obj.type = type disambiguation = result.get('disambiguation', None) if disambiguation: log.debug('got disambiguation: %s' % (disambiguation)) obj.disambiguation = disambiguation tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) # add mb relation mb_url = 'http://musicbrainz.org/artist/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() obj.save() return obj
def mb_complete_release(self, obj, mb_id): log = logging.getLogger('util.importer.mb_complete_release') log.info('complete release, r: %s | mb_id: %s' % (obj.name, mb_id)) inc = ('artists', 'url-rels', 'aliases', 'tags', 'recording-rels', 'work-rels', 'work-level-rels', 'artist-credits', 'labels', 'label-rels', 'release-groups') url = 'http://%s/ws/2/release/%s/?fmt=json&inc=%s' % ( MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() self.pp.pprint(result) rg_id = None release_group = result.get('release-group', None) if release_group: rg_id = release_group.get('id', None) log.debug('release-group id: %s' % rg_id) discogs_url = None discogs_master_url = None discogs_image = None # try to get relations if 'relations' in result: for relation in result['relations']: if relation['type'] == 'discogs': log.debug('got discogs url for release: %s' % relation['url']) discogs_url = relation['url'] # obj.save() if relation['type'] == 'purchase for download': log.debug('got purchase url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if rg_id: # try to get discogs master url inc = ('url-rels', ) url = 'http://%s/ws/2/release-group/%s/?fmt=json&inc=%s' % ( MUSICBRAINZ_HOST, rg_id, "+".join(inc)) r = requests.get(url) rg_result = r.json() print "*******************************************************************" self.pp.pprint(rg_result) # try to get relations from master if 'relations' in rg_result: for relation in rg_result['relations']: if relation['type'] == 'discogs': log.debug('got discogs master-url for release: %s' % relation['url']) discogs_master_url = relation['url'] if relation['type'] == 'wikipedia': log.debug('got wikipedia url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if relation['type'] == 'lyrics': log.debug('got lyrics url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if relation['type'] == 'allmusic': log.debug('got allmusic url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if relation['type'] == 'review': log.debug('got review url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if discogs_url: try: rel = Relation.objects.get(object_id=obj.pk, url=discogs_url) except: rel = Relation(content_object=obj, url=discogs_url) rel.save() # try to get image try: discogs_image = discogs_image_by_url(discogs_url, 'resource_url') log.debug('discogs image located at: %s' % discogs_image) except: pass if discogs_master_url: try: rel = Relation.objects.get(object_id=obj.pk, url=discogs_master_url) except: rel = Relation(content_object=obj, url=discogs_master_url) rel.save() # try to get image from master if not discogs_image: try: discogs_image = discogs_image_by_url( discogs_master_url, 'resource_url') log.debug('discogs image located at: %s' % discogs_master_url) except: pass # try to load & assign image if discogs_image: try: img = filer_extra.url_to_file(discogs_image, obj.folder) obj.main_image = img obj.save() except: log.info('unable to assign discogs image') else: # try at coverartarchive... url = 'http://coverartarchive.org/release/%s' % mb_id try: r = requests.get(url) ca_result = r.json() ca_url = ca_result['images'][0]['image'] img = filer_extra.url_to_file(ca_url, obj.folder) obj.main_image = img obj.save() except: pass # try to get some additional information from discogs if discogs_url: discogs_id = None try: discogs_id = re.findall(r'\d+', discogs_url)[0] log.info('extracted discogs id: %s' % discogs_id) except: pass if discogs_id: url = 'http://api.discogs.com/releases/%s' % discogs_id r = requests.get(url) dgs_result = r.json() styles = dgs_result.get('styles', None) for style in styles: log.debug('got style: %s' % (style)) Tag.objects.add_tag(obj, '"%s"' % style) genres = dgs_result.get('genres', None) for genre in genres: log.debug('got genre: %s' % (genre)) Tag.objects.add_tag(obj, '"%s"' % genre) notes = dgs_result.get('notes', None) if notes: obj.description = notes if discogs_master_url: discogs_id = None try: discogs_id = re.findall(r'\d+', discogs_master_url)[0] log.info('extracted discogs id: %s' % discogs_id) except: pass if discogs_id: url = 'http://api.discogs.com/masters/%s' % discogs_id r = requests.get(url) dgs_result = r.json() styles = dgs_result.get('styles', None) for style in styles: log.debug('got style: %s' % (style)) Tag.objects.add_tag(obj, '"%s"' % style) genres = dgs_result.get('genres', None) for genre in genres: log.debug('got genre: %s' % (genre)) Tag.objects.add_tag(obj, '"%s"' % genre) notes = dgs_result.get('notes', None) if notes: obj.description = notes tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) status = result.get('status', None) if status: log.debug('got status: %s' % (status)) obj.releasestatus = status country = result.get('country', None) if country: log.debug('got country: %s' % (country)) obj.release_country = country date = result.get('date', None) if date: log.debug('got date: %s' % (date)) # TODO: rework field if len(date) == 4: date = '%s-00-00' % (date) elif len(date) == 7: date = '%s-00' % (date) elif len(date) == 10: date = '%s' % (date) re_date = re.compile('^\d{4}-\d{2}-\d{2}$') if re_date.match(date) and date != '0000-00-00': obj.releasedate_approx = '%s' % date asin = result.get('asin', None) if asin: log.debug('got asin: %s' % (asin)) obj.asin = asin barcode = result.get('barcode', None) if barcode: log.debug('got barcode: %s' % (barcode)) # obj.barcode = barcode # add mb relation mb_url = 'http://musicbrainz.org/release/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() obj.save() return obj
def mb_complete_media(self, obj, mb_id, excludes=()): log = logging.getLogger('util.importer.mb_complete_media') log.info('complete media, m: %s | mb_id: %s' % (obj.name, mb_id)) #raw_input("Press Enter to continue...") time.sleep(1.1) inc = ('artists', 'url-rels', 'aliases', 'tags', 'recording-rels', 'artist-rels', 'work-level-rels', 'artist-credits') url = 'http://%s/ws/2/recording/%s/?fmt=json&inc=%s' % ( MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() print '*****************************************************************' print '*****************************************************************' print '*****************************************************************' self.pp.pprint(result) print '*****************************************************************' print '*****************************************************************' print '*****************************************************************' # self.pp.pprint(result) if 'relations' in result: for relation in result['relations']: # map artists if 'artist' in relation: print 'artist: %s' % relation['artist']['name'] print 'mb_id: %s' % relation['artist']['id'] print 'role: %s' % relation['type'] print time.sleep(0.1) l_as = lookup.artist_by_mb_id(relation['artist']['id']) l_a = None #if len(l_as) < 1 and relation['artist']['id'] not in self.mb_completed: if len(l_as ) < 1 and relation['artist']['id'] not in excludes: self.mb_completed.append(relation['artist']['id']) l_a = Artist(name=relation['artist']['name']) l_a.save() url = 'http://musicbrainz.org/artist/%s' % relation[ 'artist']['id'] print 'musicbrainz_url: %s' % url rel = Relation(content_object=l_a, url=url) rel.save() print 'artist created' if len(l_as) == 1: print 'got artist!' l_a = l_as[0] print l_as[0] profession = None if 'type' in relation: profession, created = Profession.objects.get_or_create( name=relation['type']) """""" if l_a: mea, created = MediaExtraartists.objects.get_or_create( artist=l_a, media=obj, profession=profession) l_a = self.mb_complete_artist(l_a, relation['artist']['id']) #self.pp.pprint(relation['artist']['name']) tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) # add mb relation mb_url = 'http://musicbrainz.org/recording/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() return obj
def mb_complete_media_task(obj, mb_id, mb_release_id, excludes=()): log = logging.getLogger('util.importer.mb_complete_media') log.info('complete media, m: %s | mb_id: %s' % (obj.name, mb_id)) #raw_input("Press Enter to continue...") time.sleep(1.1) inc = ('artists', 'url-rels', 'aliases', 'tags', 'recording-rels', 'artist-rels', 'work-level-rels', 'artist-credits') url = 'http://%s/ws/2/recording/%s/?fmt=json&inc=%s' % (MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() print '*****************************************************************' print url print '*****************************************************************' # get release based information (to map track- and disc-number) inc = ('recordings',) url = 'http://%s/ws/2/release/%s/?fmt=json&inc=%s' % (MUSICBRAINZ_HOST, mb_release_id, "+".join(inc)) r = requests.get(url) result_release = r.json() print '*****************************************************************' print url print '*****************************************************************' print(result) print print(result_release) print '*****************************************************************' if DEBUG_WAIT: raw_input("Press Enter to continue...") # loop release recordings, trying to get our track... if 'media' in result_release: disc_index = 0 media_index = 0 media_offset = 0 for disc in result_release['media']: for m in disc['tracks']: x_mb_id = m['recording']['id'] x_pos = m['number'] if x_mb_id == mb_id: """ print 'id: %s' % x_mb_id print 'pos: %s' % x_pos print 'disc_index: %s' % disc_index print 'media_offset: %s' % media_offset print 'final pos: %s' % (int(media_offset) + int(x_pos)) """ try: obj.tracknumber = (int(media_offset) + int(x_pos)) except: pass try: obj.mediamumber = int(disc_index) except: pass media_index =+ 1 disc_index += 1 media_offset += int(disc['track-count']) if DEBUG_WAIT: raw_input("Press Enter to continue...") if 'relations' in result: for relation in result['relations']: # map artists if 'artist' in relation: print 'artist: %s' % relation['artist']['name'] print 'mb_id: %s' % relation['artist']['id'] print 'role: %s' % relation['type'] print time.sleep(0.1) l_as = lookup.artist_by_mb_id(relation['artist']['id']) l_a = None if len(l_as) < 1 and relation['artist']['id'] not in excludes: #instance.mb_completed.append(relation['artist']['id']) l_a = Artist(name=relation['artist']['name']) l_a.save() url = 'http://musicbrainz.org/artist/%s' % relation['artist']['id'] print 'musicbrainz_url: %s' % url rel = Relation(content_object=l_a, url=url) rel.save() print 'artist created' if len(l_as) == 1: print 'got artist!' l_a = l_as[0] print l_as[0] profession = None if 'type' in relation: profession, created = Profession.objects.get_or_create(name=relation['type']) """""" if l_a: mea, created = MediaExtraartists.objects.get_or_create(artist=l_a, media=obj, profession=profession) if USE_CELERYD: mb_complete_artist_task.delay(l_a, relation['artist']['id']) else: mb_complete_artist_task(l_a, relation['artist']['id']) tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) # add mb relation mb_url = 'http://musicbrainz.org/recording/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() return obj
def release_fetch_media_mb_ids(obj): """ We have the situation that often *Releases* have an `mb_id` assigned (through manual editing process or when assigned during import) - but not the containing Media items. - selects all Releases with a `mb_id` containing Tracks without `mb_id`. - for every track - if `tracknumber` and `name` are equal - the `mb_id` is added as a Relation """ mb_id = uuid_by_object(obj, service='musicbrainz') mb_ids_added = [] log.debug('processing: {} - id:{} - mb_id:{}'.format(obj, obj.pk, mb_id)) # get media objects without musicbrainz relation qs_media = obj.media_release.exclude(relations__service='musicbrainz') if not qs_media.exists(): log.debug('no media objects without mb relation') return else: log.debug('{} media objects without mb relation'.format(qs_media.count())) # load release + relations from mb api url = 'http://{host}/ws/2/release/{mb_id}/?fmt=json&inc=recordings'.format( host=MUSICBRAINZ_HOST, mb_id=mb_id ) try: r = requests.get(url) _data = r.json() log.debug('successfully loaded data from {}'.format(url)) except Exception as e: log.warning('unable to load data from {}'.format(url)) return if not 'media' in _data: log.warning('unable to load media {}'.format(url)) return # map tracknumbers from lp format A1, A2, B1, B2 etc to 1, 2, 3, 4 ... try: if _data['media'][0]['tracks'][0]['number'][0:1] == 'A': for media in _data['media']: _track_number = 1 for track in media['tracks']: track['number'] = _track_number _track_number += 1 except (IndexError, KeyError): pass # map tracknumbers for multi-disc releases _tracks = {} for m in _data['media']: for t in m['tracks']: try: t_no = int(t['number']) + m['track-offset'] _tracks[t_no] = t except ValueError as e: log.warning('unable to map tracknumber "{}" - {}'.format(t['number'], e)) for m in qs_media: log.debug('looking up results for #{} - {}'.format(m.tracknumber, m)) try: _track = _tracks[m.tracknumber] except KeyError as e: return log.debug('track from results #{} - {}'.format(m.tracknumber, _track['title'])) # check if titles match if unify_name(m.name).lower() == unify_name(_track['title']).lower(): log.info('got id for match: {}'.format(_track['recording']['id'])) mb_recording_id = _track['recording']['id'] # add mb relation if mb_recording_id: mb_url = 'http://musicbrainz.org/recording/{mb_id}'.format( mb_id=mb_recording_id ) try: rel = Relation.objects.get(object_id=m.pk, url=mb_url) except Relation.DoesNotExist as e: log.debug('relation not here yet, so add it: {}'.format(mb_url)) rel = Relation(content_object=m, url=mb_url) rel.save() mb_ids_added.append(mb_recording_id) else: log.info('no match: "{}" <> "{}"'.format(m.name, _track['title'])) return mb_ids_added
def mb_complete_release(self, obj, mb_id): log = logging.getLogger('util.importer.mb_complete_release') log.info('complete release, r: %s | mb_id: %s' % (obj.name, mb_id)) inc = ('artists', 'url-rels', 'aliases', 'tags', 'recording-rels', 'work-rels', 'work-level-rels', 'artist-credits', 'labels', 'label-rels', 'release-groups') url = 'http://%s/ws/2/release/%s/?fmt=json&inc=%s' % (MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() self.pp.pprint(result) rg_id = None release_group = result.get('release-group', None) if release_group: rg_id = release_group.get('id', None) log.debug('release-group id: %s' % rg_id) discogs_url = None discogs_master_url = None discogs_image = None # try to get relations if 'relations' in result: for relation in result['relations']: if relation['type'] == 'discogs': log.debug('got discogs url for release: %s' % relation['url']) discogs_url = relation['url'] # obj.save() if relation['type'] == 'purchase for download': log.debug('got purchase url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if rg_id: # try to get discogs master url inc = ('url-rels',) url = 'http://%s/ws/2/release-group/%s/?fmt=json&inc=%s' % (MUSICBRAINZ_HOST, rg_id, "+".join(inc)) r = requests.get(url) rg_result = r.json() print "*******************************************************************" self.pp.pprint(rg_result) # try to get relations from master if 'relations' in rg_result: for relation in rg_result['relations']: if relation['type'] == 'discogs': log.debug('got discogs master-url for release: %s' % relation['url']) discogs_master_url = relation['url'] if relation['type'] == 'wikipedia': log.debug('got wikipedia url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if relation['type'] == 'lyrics': log.debug('got lyrics url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if relation['type'] == 'allmusic': log.debug('got allmusic url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if relation['type'] == 'review': log.debug('got review url for release: %s' % relation['url']) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) rel.save() if discogs_url: try: rel = Relation.objects.get(object_id=obj.pk, url=discogs_url) except: rel = Relation(content_object=obj, url=discogs_url) rel.save() # try to get image try: discogs_image = discogs_image_by_url(discogs_url, 'resource_url') log.debug('discogs image located at: %s' % discogs_image) except: pass if discogs_master_url: try: rel = Relation.objects.get(object_id=obj.pk, url=discogs_master_url) except: rel = Relation(content_object=obj, url=discogs_master_url) rel.save() # try to get image from master if not discogs_image: try: discogs_image = discogs_image_by_url(discogs_master_url, 'resource_url') log.debug('discogs image located at: %s' % discogs_master_url) except: pass # try to load & assign image if discogs_image: try: img = filer_extra.url_to_file(discogs_image, obj.folder) obj.main_image = img obj.save() except: log.info('unable to assign discogs image') else: # try at coverartarchive... url = 'http://coverartarchive.org/release/%s' % mb_id try: r = requests.get(url) ca_result = r.json() ca_url = ca_result['images'][0]['image'] img = filer_extra.url_to_file(ca_url, obj.folder) obj.main_image = img obj.save() except: pass # try to get some additional information from discogs if discogs_url: discogs_id = None try: discogs_id = re.findall(r'\d+', discogs_url)[0] log.info('extracted discogs id: %s' % discogs_id) except: pass if discogs_id: url = 'http://api.discogs.com/releases/%s' % discogs_id r = requests.get(url) dgs_result = r.json() styles = dgs_result.get('styles', None) for style in styles: log.debug('got style: %s' % (style)) Tag.objects.add_tag(obj, '"%s"' % style) genres = dgs_result.get('genres', None) for genre in genres: log.debug('got genre: %s' % (genre)) Tag.objects.add_tag(obj, '"%s"' % genre) notes = dgs_result.get('notes', None) if notes: obj.description = notes if discogs_master_url: discogs_id = None try: discogs_id = re.findall(r'\d+', discogs_master_url)[0] log.info('extracted discogs id: %s' % discogs_id) except: pass if discogs_id: url = 'http://api.discogs.com/masters/%s' % discogs_id r = requests.get(url) dgs_result = r.json() styles = dgs_result.get('styles', None) for style in styles: log.debug('got style: %s' % (style)) Tag.objects.add_tag(obj, '"%s"' % style) genres = dgs_result.get('genres', None) for genre in genres: log.debug('got genre: %s' % (genre)) Tag.objects.add_tag(obj, '"%s"' % genre) notes = dgs_result.get('notes', None) if notes: obj.description = notes tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) status = result.get('status', None) if status: log.debug('got status: %s' % (status)) obj.releasestatus = status country = result.get('country', None) if country: log.debug('got country: %s' % (country)) obj.release_country = country date = result.get('date', None) if date: log.debug('got date: %s' % (date)) # TODO: rework field if len(date) == 4: date = '%s-00-00' % (date) elif len(date) == 7: date = '%s-00' % (date) elif len(date) == 10: date = '%s' % (date) re_date = re.compile('^\d{4}-\d{2}-\d{2}$') if re_date.match(date): obj.releasedate_approx = '%s' % date asin = result.get('asin', None) if asin: log.debug('got asin: %s' % (asin)) obj.asin = asin barcode = result.get('barcode', None) if barcode: log.debug('got barcode: %s' % (barcode)) # obj.barcode = barcode # add mb relation mb_url = 'http://musicbrainz.org/release/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() obj.save() return obj
def run(self, legacy_obj): from alibrary.models import Media, Relation status = 1 log = logging.getLogger('util.migrator.run') log.info('migrate media: %s' % legacy_obj.name) obj, created = Media.objects.get_or_create(legacy_id=legacy_obj.id) if created: log.info('object created: %s' % obj.pk) else: log.info('object found by legacy_id: %s' % obj.pk) if created: """ Mapping data 1-to-1 fields """ obj.name = legacy_obj.name obj.created = legacy_obj.created obj.updated = legacy_obj.updated if legacy_obj.published: obj.publish_date = legacy_obj.published if legacy_obj.tracknumber: log.debug('tracknumber: %s' % legacy_obj.tracknumber) try: obj.tracknumber = int(legacy_obj.tracknumber) except: pass """ Relation mapping """ if legacy_obj.mb_trackid and legacy_obj.mb_trackid != 'nf': url = 'http://musicbrainz.org/recording/%s' % legacy_obj.mb_trackid log.debug('mb_trackid: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.soundcloud_url and legacy_obj.soundcloud_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.soundcloud_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.youtube_url and legacy_obj.youtube_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.youtube_url) log.debug('url: %s' % rel.url) rel.save() """ Release Mapping """ legacy_items = MediasReleases.objects.using('legacy').filter( media=legacy_obj) # r.tags.clear() for legacy_item in legacy_items: log.debug('mapping release') item, s = get_release_by_legacy_object(legacy_item.release) obj.release = item """ Artist Mapping """ legacy_items = ArtistsMedias.objects.using('legacy').filter( media=legacy_obj) # r.tags.clear() for legacy_item in legacy_items: log.debug('mapping artist') item, s = get_artist_by_legacy_object(legacy_item.artist) obj.artist = item """ User mapping """ try: legacy_user = Users.objects.using('legacy').get( id=legacy_obj.user_id) log.debug('mapping user') item, s = get_user_by_legacy_object(legacy_user) if item: obj.creator = item except: pass """ Tag Mapping """ nts = NtagsMedias.objects.using('legacy').filter( media_id=legacy_obj.id) for nt in nts: try: t = Ntags.objects.using('legacy').get(id=nt.ntag_id) log.debug('tag for object: %s' % t.name) Tag.objects.add_tag(obj, u'"%s"' % t.name[:30]) except Exception, e: print e """ Get path to file """ try: base_path = '%s%s' % (LEGACY_STORAGE_ROOT, 'media/') media_dir = '%s%s/' % (base_path, id_to_location( obj.legacy_id)) if legacy_obj.has_flac_default == 1: print 'FLAC' media_path = '%sdefault.flac' % (media_dir) else: print 'MP3' media_path = '%sdefault.mp3' % (media_dir) print '******************************************************' print media_dir print media_path if os.path.isfile(media_path): print 'file exists!' """""" folder = "private/%s/" % (obj.uuid.replace('-', '/')) filename, extension = os.path.splitext(media_path) dst = os.path.join(folder, "master%s" % extension.lower()) print dst try: os.makedirs("%s/%s" % (MEDIA_ROOT, folder)) shutil.copy(media_path, "%s/%s" % (MEDIA_ROOT, dst)) obj.master = dst obj.save() except Exception, e: print e else:
def mb_complete_artist(self, obj, mb_id): log = logging.getLogger('util.importer.mb_complete_artist') log.info('complete artist, a: %s | mb_id: %s' % (obj.name, mb_id)) self.mb_completed.append(mb_id) inc = ('url-rels', 'tags') url = 'http://%s/ws/2/artist/%s/?fmt=json&inc=%s' % (MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() print '#########################################################################' self.pp.pprint(result) discogs_url = None discogs_image = None valid_relations = ('wikipedia', 'allmusic', 'BBC Music page', 'social network', 'official homepage', 'youtube', 'myspace',) relations = result.get('relations', ()) for relation in relations: if relation['type'] == 'discogs': log.debug('got discogs url for artist: %s' % relation['url']) discogs_url = relation['url'] if relation['type'] in valid_relations: log.debug('got %s url for artist: %s' % (relation['type'], relation['url'])) try: rel = Relation.objects.get(object_id=obj.pk, url=relation['url']) except: rel = Relation(content_object=obj, url=relation['url']) if relation['type'] == 'official homepage': rel.service = 'official' rel.save() if discogs_url: try: rel = Relation.objects.get(object_id=obj.pk, url=discogs_url) except: rel = Relation(content_object=obj, url=discogs_url) rel.save() # try to get image try: discogs_image = discogs_image_by_url(discogs_url, 'resource_url') log.debug('discogs image located at: %s' % discogs_image) except: pass # try to load & assign image if discogs_image: try: img = filer_extra.url_to_file(discogs_image, obj.folder) obj.main_image = img obj.save() except: log.info('unable to assign discogs image') if discogs_url: discogs_id = None try: # TODO: not sure if always working discogs_id = discogs_id_by_url(discogs_url) log.info('extracted discogs id: %s' % discogs_id) except: pass if discogs_id: url = 'http://api.discogs.com/artists/%s' % discogs_id r = requests.get(url) dgs_result = r.json() self.pp.pprint(dgs_result) """ styles = dgs_result.get('styles', ()) for style in styles: log.debug('got style: %s' % (style)) Tag.objects.add_tag(obj, '"%s"' % style) """ profile = dgs_result.get('profile', None) if profile: obj.biography = profile realname = dgs_result.get('realname', None) if realname: obj.real_name = realname """ verry hackish part here, just as proof-of-concept """ aliases = dgs_result.get('aliases', ()) for alias in aliases: try: log.debug('got alias: %s' % alias['name']) # TODO: improve! handle duplicates! time.sleep(1.1) r = requests.get(alias['resource_url']) aa_result = r.json() aa_discogs_url = aa_result.get('uri', None) aa_name = aa_result.get('name', None) aa_profile = aa_result.get('profile', None) if aa_discogs_url and aa_name: l_as = lookup.artist_by_relation_url(aa_discogs_url) l_a = None if len(l_as) < 1: l_a = Artist(name=aa_name, biography=aa_profile) l_a.save() rel = Relation(content_object=l_a, url=aa_discogs_url) rel.save() if len(l_as) == 1: l_a = l_as[0] print l_as[0] if l_a: obj.aliases.add(l_a) except: pass """ verry hackish part here, just as proof-of-concept """ members = dgs_result.get('members', ()) for member in members: try: log.debug('got member: %s' % member['name']) # TODO: improve! handle duplicates! time.sleep(1.1) r = requests.get(member['resource_url']) ma_result = r.json() ma_discogs_url = ma_result.get('uri', None) ma_name = ma_result.get('name', None) ma_profile = ma_result.get('profile', None) if ma_discogs_url and ma_name: l_as = lookup.artist_by_relation_url(ma_discogs_url) l_a = None if len(l_as) < 1: l_a = Artist(name=ma_name, biography=ma_profile) l_a.save() rel = Relation(content_object=l_a, url=ma_discogs_url) rel.save() if len(l_as) == 1: l_a = l_as[0] print l_as[0] if l_a: ma = ArtistMembership.objects.get_or_create(parent=obj, child=l_a) except: pass type = result.get('type', None) if type: log.debug('got type: %s' % (type)) obj.type = type disambiguation = result.get('disambiguation', None) if disambiguation: log.debug('got disambiguation: %s' % (disambiguation)) obj.disambiguation = disambiguation tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) # add mb relation mb_url = 'http://musicbrainz.org/artist/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() obj.save() return obj
class LabelMigrator(Migrator): def __init__(self): log = logging.getLogger('util.migrator.__init__') def run(self, legacy_obj): from alibrary.models import Label, Relation, Distributor, DistributorLabel status = 1 log = logging.getLogger('util.migrator.run') log.info('migrate release: %s' % legacy_obj.name) obj, created = Label.objects.get_or_create(legacy_id=legacy_obj.id) if created: log.info('object created: %s' % obj.pk) else: log.info('object found by legacy_id: %s' % obj.pk) if created: """ Mapping data 1-to-1 fields """ obj.name = legacy_obj.name obj.created = legacy_obj.created obj.updated = legacy_obj.updated if legacy_obj.published: obj.published = legacy_obj.published if legacy_obj.label_type: obj.type = legacy_obj.label_type if legacy_obj.label_code: log.debug('label_code: %s' % legacy_obj.label_code) obj.labelcode = legacy_obj.label_code[:200] if legacy_obj.profile: if legacy_obj.notes: obj.description = "%s\n\n%s" % (legacy_obj.profile, legacy_obj.notes) else: obj.description = legacy_obj.profile if legacy_obj.address: obj.address = legacy_obj.address if legacy_obj.contact: log.debug('contact: %s' % legacy_obj.contact) if email_re.match(legacy_obj.contact): obj.email = legacy_obj.contact if legacy_obj.country: log.debug('country: %s' % legacy_obj.country) country = None if len(legacy_obj.country) == 2: try: country = Country.objects.get( iso2_code=legacy_obj.country) except Exception, e: pass else: try: country = Country.objects.get( printable_name=legacy_obj.country) except Exception, e: pass if country: log.debug('got country: %s' % country.name) obj.country = country if legacy_obj.distributor: log.debug('distributor: %s' % legacy_obj.distributor) d, c = Distributor.objects.get_or_create( name=legacy_obj.distributor) dl = DistributorLabel(distributor=d, label=obj) dl.save() """ Relation mapping """ if legacy_obj.discogs_labelid and legacy_obj.discogs_labelid != 'nf': url = 'http://www.discogs.com/label/%s' % legacy_obj.discogs_labelid log.debug('discogs_url: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.mb_labelid and legacy_obj.mb_labelid != 'nf': url = 'http://musicbrainz.org/label/%s' % legacy_obj.mb_labelid log.debug('mb_labelid: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.facebook_url and legacy_obj.facebook_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.facebook_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.wikipedia_url and legacy_obj.wikipedia_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.wikipedia_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.soundcloud_url and legacy_obj.soundcloud_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.soundcloud_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.lastfm_url and legacy_obj.lastfm_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.lastfm_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.website and legacy_obj.website != 'nf': rel = Relation(content_object=obj, url=legacy_obj.website, service='official') log.debug('url: %s' % rel.url) rel.save() if legacy_obj.various_links and len(legacy_obj.various_links) > 10: for entry in legacy_obj.various_links.splitlines(): try: validate_url(entry) rel = Relation(content_object=obj, url=entry) log.debug('url (from various): %s' % rel.url) rel.save() except ValidationError, e: print e
def run(self, legacy_obj): from alibrary.models import Media, Relation status = 1 log = logging.getLogger('util.migrator.run') log.info('migrate media: %s' % legacy_obj.name) obj, created = Media.objects.get_or_create(legacy_id=legacy_obj.id) if created: log.info('object created: %s' % obj.pk) else: log.info('object found by legacy_id: %s' % obj.pk) if created: """ Mapping data 1-to-1 fields """ obj.name = legacy_obj.name obj.created = legacy_obj.created obj.updated = legacy_obj.updated if legacy_obj.published: obj.publish_date = legacy_obj.published if legacy_obj.tracknumber: log.debug('tracknumber: %s' % legacy_obj.tracknumber) try: obj.tracknumber = int(legacy_obj.tracknumber) except: pass """ Relation mapping """ if legacy_obj.mb_trackid and legacy_obj.mb_trackid != 'nf': url = 'http://musicbrainz.org/recording/%s' % legacy_obj.mb_trackid log.debug('mb_trackid: %s' % url) rel = Relation(content_object=obj, url=url) rel.save() if legacy_obj.soundcloud_url and legacy_obj.soundcloud_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.soundcloud_url) log.debug('url: %s' % rel.url) rel.save() if legacy_obj.youtube_url and legacy_obj.youtube_url != 'nf': rel = Relation(content_object=obj, url=legacy_obj.youtube_url) log.debug('url: %s' % rel.url) rel.save() """ Release Mapping """ legacy_items = MediasReleases.objects.using('legacy').filter(media=legacy_obj) # r.tags.clear() for legacy_item in legacy_items: log.debug('mapping release') item, s = get_release_by_legacy_object(legacy_item.release) obj.release = item """ Artist Mapping """ legacy_items = ArtistsMedias.objects.using('legacy').filter(media=legacy_obj) # r.tags.clear() for legacy_item in legacy_items: log.debug('mapping artist') item, s = get_artist_by_legacy_object(legacy_item.artist) obj.artist = item """ User mapping """ try: legacy_user = Users.objects.using('legacy').get(id=legacy_obj.user_id) log.debug('mapping user') item, s = get_user_by_legacy_object(legacy_user) if item: obj.creator = item except: pass """ Tag Mapping """ nts = NtagsMedias.objects.using('legacy').filter(media_id=legacy_obj.id) for nt in nts: try: t = Ntags.objects.using('legacy').get(id=nt.ntag_id) log.debug('tag for object: %s' % t.name) Tag.objects.add_tag(obj, u'"%s"' % t.name[:30]) except Exception, e: print e """ Get path to file """ try: base_path = '%s%s' % (LEGACY_STORAGE_ROOT, 'media/') media_dir = '%s%s/' % (base_path, id_to_location(obj.legacy_id)) if legacy_obj.has_flac_default == 1: print 'FLAC' media_path = '%sdefault.flac' % (media_dir) else: print 'MP3' media_path = '%sdefault.mp3' % (media_dir) print '******************************************************' print media_dir print media_path if os.path.isfile(media_path): print 'file exists!' """""" folder = "private/%s/" % (obj.uuid.replace('-', '/')) filename, extension = os.path.splitext(media_path) dst = os.path.join(folder, "master%s" % extension.lower()) print dst try: os.makedirs("%s/%s" % (MEDIA_ROOT, folder)) shutil.copy(media_path, "%s/%s" % (MEDIA_ROOT, dst)) obj.master = dst obj.save() except Exception, e: print e else:
def mb_complete_media(self, obj, mb_id, excludes=()): log = logging.getLogger('util.importer.mb_complete_media') log.info('complete media, m: %s | mb_id: %s' % (obj.name, mb_id)) #raw_input("Press Enter to continue...") time.sleep(1.1) inc = ('artists', 'url-rels', 'aliases', 'tags', 'recording-rels', 'artist-rels', 'work-level-rels', 'artist-credits') url = 'http://%s/ws/2/recording/%s/?fmt=json&inc=%s' % (MUSICBRAINZ_HOST, mb_id, "+".join(inc)) r = requests.get(url) result = r.json() print '*****************************************************************' print '*****************************************************************' print '*****************************************************************' self.pp.pprint(result) print '*****************************************************************' print '*****************************************************************' print '*****************************************************************' # self.pp.pprint(result) if 'relations' in result: for relation in result['relations']: # map artists if 'artist' in relation: print 'artist: %s' % relation['artist']['name'] print 'mb_id: %s' % relation['artist']['id'] print 'role: %s' % relation['type'] print time.sleep(0.1) l_as = lookup.artist_by_mb_id(relation['artist']['id']) l_a = None #if len(l_as) < 1 and relation['artist']['id'] not in self.mb_completed: if len(l_as) < 1 and relation['artist']['id'] not in excludes: self.mb_completed.append(relation['artist']['id']) l_a = Artist(name=relation['artist']['name']) l_a.save() url = 'http://musicbrainz.org/artist/%s' % relation['artist']['id'] print 'musicbrainz_url: %s' % url rel = Relation(content_object=l_a, url=url) rel.save() print 'artist created' if len(l_as) == 1: print 'got artist!' l_a = l_as[0] print l_as[0] profession = None if 'type' in relation: profession, created = Profession.objects.get_or_create(name=relation['type']) """""" if l_a: mea, created = MediaExtraartists.objects.get_or_create(artist=l_a, media=obj, profession=profession) l_a = self.mb_complete_artist(l_a, relation['artist']['id']) #self.pp.pprint(relation['artist']['name']) tags = result.get('tags', ()) for tag in tags: log.debug('got tag: %s' % (tag['name'])) Tag.objects.add_tag(obj, '"%s"' % tag['name']) # add mb relation mb_url = 'http://musicbrainz.org/recording/%s' % (mb_id) try: rel = Relation.objects.get(object_id=obj.pk, url=mb_url) except: log.debug('relation not here yet, add it: %s' % (mb_url)) rel = Relation(content_object=obj, url=mb_url) rel.save() return obj
def release_fetch_media_mb_ids(obj): """ We have the situation that often *Releases* have an `mb_id` assigned (through manual editing process or when assigned during import) - but not the containing Media items. - selects all Releases with a `mb_id` containing Tracks without `mb_id`. - for every track - if `tracknumber` and `name` are equal - the `mb_id` is added as a Relation """ mb_id = uuid_by_object(obj, service='musicbrainz') mb_ids_added = [] log.debug('processing: {} - id:{} - mb_id:{}'.format(obj, obj.pk, mb_id)) # get media objects without musicbrainz relation qs_media = obj.media_release.exclude(relations__service='musicbrainz') if not qs_media.exists(): log.debug('no media objects without mb relation') return else: log.debug('{} media objects without mb relation'.format( qs_media.count())) # load release + relations from mb api url = 'http://{host}/ws/2/release/{mb_id}/?fmt=json&inc=recordings'.format( host=MUSICBRAINZ_HOST, mb_id=mb_id) try: r = requests.get(url) _data = r.json() log.debug('successfully loaded data from {}'.format(url)) except Exception as e: log.warning('unable to load data from {}'.format(url)) return if not 'media' in _data: log.warning('unable to load media {}'.format(url)) return # map tracknumbers from lp format A1, A2, B1, B2 etc to 1, 2, 3, 4 ... try: if _data['media'][0]['tracks'][0]['number'][0:1] == 'A': for media in _data['media']: _track_number = 1 for track in media['tracks']: track['number'] = _track_number _track_number += 1 except (IndexError, KeyError): pass # map tracknumbers for multi-disc releases _tracks = {} for m in _data['media']: for t in m['tracks']: try: t_no = int(t['number']) + m['track-offset'] _tracks[t_no] = t except ValueError as e: log.warning('unable to map tracknumber "{}" - {}'.format( t['number'], e)) for m in qs_media: log.debug('looking up results for #{} - {}'.format( m.tracknumber, m)) try: _track = _tracks[m.tracknumber] except KeyError as e: return log.debug('track from results #{} - {}'.format( m.tracknumber, _track['title'])) # check if titles match if unify_name(m.name).lower() == unify_name( _track['title']).lower(): log.info('got id for match: {}'.format( _track['recording']['id'])) mb_recording_id = _track['recording']['id'] # add mb relation if mb_recording_id: mb_url = 'http://musicbrainz.org/recording/{mb_id}'.format( mb_id=mb_recording_id) try: rel = Relation.objects.get(object_id=m.pk, url=mb_url) except Relation.DoesNotExist as e: log.debug( 'relation not here yet, so add it: {}'.format( mb_url)) rel = Relation(content_object=m, url=mb_url) rel.save() mb_ids_added.append(mb_recording_id) else: log.info('no match: "{}" <> "{}"'.format( m.name, _track['title'])) return mb_ids_added