def fetchArtistTags(self, artist, maxTagsToFetch, minWeight, retries=3): ''' Retrieve artist tags from LastFM, filtering out those tags that appear bunky (below the specified weight, longer than the maximum allowable distance, self-referential, etc. ''' try: lastfm = pylast.get_lastfm_network(api_key=self.api_key, api_secret=self.api_secret) tags = self.processSeenTags( lastfm.get_artist(artist).get_top_tags(limit=maxTagsToFetch), minWeight) return filter( lambda pair: pair[0].lower().replace('the', '').strip() != artist.replace('the', '').strip(), tags) except Exception, err: if ('no artist' in str(err).lower()): return [] if (retries > 0): common.safeStderr( 'Problem retrieving artist tag information for [' + artist + '], ' + str(retries) + ' retries left: ' + str(err)) time.sleep(5) return self.fetchArtistTags(artist, maxTagsToFetch, minWeight, retries - 1) else: common.safeStderr( 'Problem retrieving artist tag information for [' + artist + '], skipping: ' + str(err))
def updateTags(self, filename, tagPayload): try: mediawrapper = self.getMediawrapper(filename) for bucket in tagPayload: tagPayload[bucket] = self.tagSep.join( tagPayload[bucket][0:self.maxTags[bucket]]) if (isinstance(mediawrapper, ID3)): return self.updateTagsHelperID3(mediawrapper, tagPayload, self.formatFieldMap['id3']) elif (isinstance(mediawrapper, MP4)): return self.updateTagsHelper(mediawrapper, tagPayload, self.formatFieldMap['mp4']) elif (isinstance(mediawrapper, OggVorbis)): return self.updateTagsHelper(mediawrapper, tagPayload, self.formatFieldMap['oggvorbis']) elif (isinstance(mediawrapper, FLAC)): return self.updateTagsHelper(mediawrapper, tagPayload, self.formatFieldMap['flac']) else: common.safeStdout( 'Skipping unknown/incompatible media file type [' + filename + ']') except Exception, err: common.safeStderr('Error seen during update processing: ' + str(err))
def extractMetadataHelper(self, mediawrapper, fieldMap, filename): ''' Retrieves artist, album, and track data, forcing it to unicode ''' artists = [] for artistField in self.artistFieldPref: if (fieldMap[artistField] in mediawrapper): tmpartist = mediawrapper[fieldMap[artistField]][0] if (not common.isempty(tmpartist)): artists.append(unicode(tmpartist).lower()) if (self.useBothArtistFields): continue break artists = set(artists).difference(self.meaninglessArtists) if (len(artists) == 0): common.safeStderr('No artist info found for [' + filename + ']') return None # album album = u'-unknown-' if (fieldMap['album'] in mediawrapper): tmpalbum = mediawrapper[fieldMap['album']][0] if (not common.isempty(tmpalbum)): album = unicode(tmpalbum).lower() # track track = None if (fieldMap['track'] in mediawrapper): tmptrack = mediawrapper[fieldMap['track']][0] if (not common.isempty(tmptrack)): track = unicode(tmptrack).lower() if (track is None): common.safeStderr('No track title found for [' + filename + ']') return None return {'artists': artists, 'album': album, 'track': track}
def extractMetadata(self, filename): try: mediawrapper = self.getMediawrapper(filename) if (isinstance(mediawrapper, ID3)): return self.extractMetadataHelper(mediawrapper, self.formatFieldMap['id3'], filename) elif (isinstance(mediawrapper, MP4)): return self.extractMetadataHelper(mediawrapper, self.formatFieldMap['mp4'], filename) elif (isinstance(mediawrapper, OggVorbis)): return self.extractMetadataHelper( mediawrapper, self.formatFieldMap['oggvorbis'], filename) elif (isinstance(mediawrapper, FLAC)): return self.extractMetadataHelper(mediawrapper, self.formatFieldMap['flac'], filename) else: if (self.config.getboolean('verbose')): common.safeStdout( '\tSkipping unknown/incompatible media file type [' + filename + ']') except Exception, err: common.safeStderr('Error seen during media reading: ' + str(err))
def fetchTagStats(self, lastfm): ''' Fetch overall/LastFM-wide tag counts. Currently only works for LastFM's 'top tracks' (they don't syndicate counts for arbitrary tags ''' toptags = lastfm.fetchTopTagStats() if (toptags is None or len(toptags) == 0): common.safeStderr('Could not retrieve tag counts from lastFM') for lasttag in self.lastTagLibrary: self.lastTagLibrary[lasttag] = 0 return #lonelytags = set() for lasttag in self.lastTagLibrary: if (lasttag in toptags): self.lastTagLibrary[lasttag] = toptags[lasttag]
def fetchTagCount(self, tag, retries=3): ''' Return a count/weight, for the specified tag. The API no longer seems to return counts, though, so this is currently deprecated. ''' try: lastfm = pylast.get_lastfm_network(api_key=self.api_key, api_secret=self.api_secret) results = lastfm.search_for_tag(tag) if (results.get_total_result_count() > 0): topmatch = results.get_next_page()[0] if (topmatch is not None): if (self.useNamedTuple): return int(topmatch.weight) return int(topmatch['weight']) except Exception, err: if (retries > 0): common.safeStderr('Problem retrieving tag information, ' + str(retries) + ' retries left: ' + str(err)) time.sleep(5) return self.fetchTopTagStats(retries - 1) else: common.safeStderr('Problem retrieving tag information, ' + str(retries) + ' skipping: ' + str(err))
def fetchTopTagStats(self, retries=3): ''' LastFM provides a unified list tags/counts, for the top tags. By fetching these in one call, we can typically avoid a ton of unnecessary network calls for individual tags. ''' tags = {} try: lastfm = pylast.get_lastfm_network(api_key=self.api_key, api_secret=self.api_secret) lastTopTags = lastfm.get_top_tags(10000) for lastTopTag in lastTopTags: if (self.useNamedTuple): key = unicode(lastTopTag.item.name).lower() count = int(lastTopTag.weight) else: key = unicode(lastTopTag['item'].name).lower() count = int(lastTopTag['weight']) if (key in tags): common.safeStderr( 'Duplicate tag retrieved from lastFM, merging counts: ' + lastTopTag) tags[key] += count else: tags[key] = count return tags except Exception, err: if (retries > 0): common.safeStderr('Problem retrieving top tag information, ' + str(retries) + ' retries left: ' + str(err)) time.sleep(5) return self.fetchTopTagStats(retries - 1) else: common.safeStderr('Problem retrieving top tag information, ' + str(retries) + ' skipping: ' + str(err))
def fromXml(self, rootElement): numartists = 0 numalbums = 0 numtracks = 0 try: artistsElement = rootElement.find('artists') for artistElement in artistsElement.findall('artist'): nameElement = artistElement.find('name') if (nameElement is None): common.safeStderr('Missing name element on ['+artistElement.tag+']') continue artist = unicode(nameElement.text).lower() # tags = None means there is no tag info, tags = [] means we know it's an empty list artistTags = None artistTagElements = artistElement.findall('tag') if (artistTagElements is not None and len(artistTagElements) > 0): artistTags = [] for artistTagElement in artistTagElements: artistTags.append((unicode(artistTagElement.text), int(artistTagElement.get('weight')))) elif (artistElement.find('notags') is not None): artistTags = [] for albumElement in artistElement.findall('album'): nameElement = albumElement.find('name') if (nameElement is None): common.safeStderr('Missing name element on ['+albumElement.tag+']') continue album = unicode(nameElement.text).lower() for trackElement in albumElement.findall('track'): nameElement = trackElement.find('name') if (nameElement is None): common.safeStderr('Missing name element on ['+trackElement.tag+']') continue track = unicode(nameElement.text).lower() # tags = None means there is no tag info, tags = [] means we know it's an empty list trackTags = None trackTagElements = trackElement.findall('tag') if (trackTagElements is not None and len(trackTagElements) > 0): trackTags = [] for trackTagElement in trackTagElements: trackTags.append((unicode(trackTagElement.text), int(trackTagElement.get('weight')))) elif (trackElement.find('notags') is not None): trackTags = [] self.addToMediaLibrary(artist, album, track, artistTags, trackTags) numtracks += 1 numalbums += 1 numartists += 1 print 'Loaded ['+str(numartists)+'] artists, ['+str(numalbums)+'] albums, and ['+str(numtracks)+'] cached tracks' lastTagsElement = rootElement.find('lasttags') for lastTagElement in lastTagsElement.findall('tag'): self.addToLastFMTagLibrary(unicode(lastTagElement.text), int(lastTagElement.get('hits'))) except Exception, err: raise Exception('Could not deserialize the XML cache data, possibly corrupted: '+str(err)), None, sys.exc_info()[2]
def loadSynonyms(self): synfile = self.config.get('tagSynonymsFile') if (common.isempty(synfile)): return if (not os.path.exists(synfile) or not os.access(synfile, os.R_OK)): common.safeStderr('Synonyms file either does not exist or cannot be accessed ['+synfile+']') # Read the synonmyms file. The expected format is: # original token(tab)replacement token[,replacement token]... # e.g. # rnb rhythm and blues, r&b # This would replace any instance of 'rnb' seen in the LastFM tag set with both 'rhythm and blues' and 'r&b' # We preserve order, for the replacement values (so you can order them as you would like them to be replaced) for line in fileinput.input(synfile): # Allow inline comments if ('#' in line): line = line.split('#')[0] line = line.strip() if (common.isempty(line)): continue if (isinstance(line, str)): line = unicode(line, 'latin1') synline = line.split('\t') if (len(synline) < 2): common.safeStderr('Invalid synonym file line: '+line) continue original = synline[0].lower() replacements = map(string.strip, synline[1].split(',')) if ('-none-' in map(lambda val: val.lower(), replacements)): self.synonyms[original] = [] elif (original in self.synonyms): self.synonyms[original] = common.distinctSeq(self.synonyms[original] + replacements) else: self.synonyms[original] = common.distinctSeq(replacements) #for syn in sorted(self.synonyms): # common.safeStdout('Synonyms: '+ syn + ' :: '+ ', '.join(sorted(self.synonyms[syn]))) if (self.config.getboolean('verbose')): print 'Loaded ['+str(len(self.synonyms.keys()))+'] tag synonyms'
def updateTags(self): ''' This pushes the tags back into the underlying media files ''' verbose = self.config.getboolean('verbose') mediadir = self.config.get('mediaDir') startDelim = self.config.get('tagStartDelim') endDelim = self.config.get('tagEndDelim') artistTagFields = set(map(string.strip, self.config.get('artistTagFields').lower().split(','))) trackTagFields = set(map(string.strip, self.config.get('trackTagFields').lower().split(','))) touchedFields = artistTagFields.union(trackTagFields) skipExtensions = map(lambda x: '.'+x.lower().strip(), self.config.get('skipExtensions').split(',')) writeUntaggedArtist = (self.config.get('writeUntaggedTag').lower() == 'artist' or self.config.get('writeUntaggedTag').lower() == 'both') writeUntaggedTrack = (self.config.get('writeUntaggedTag').lower() == 'track' or self.config.get('writeUntaggedTag').lower() == 'both') if (touchedFields is None or len(touchedFields) == 0): common.safeStderr('Perhaps you should configure a destination field...') return self.loadSynonyms() self.generateLocalTags() common.safeStdout('Updating tags in ['+mediadir+']') numfiles = 0 for root, dirs, files in os.walk(mediadir): for filename in files: fname, ext = os.path.splitext(filename.lower()) if (ext is not None and ext in skipExtensions): continue metadata = self.mediaHelper.extractMetadata(os.path.join(root, filename)) if (metadata is None or len(metadata['artists']) == 0 or metadata['album'] is None or metadata['track'] is None): continue album, track = metadata['album'].lower(), metadata['track'].lower() artistTags = [] trackTags = [] for artist in map(string.lower, metadata['artists']): if (artist not in self.mediaLibrary or album not in self.mediaLibrary[artist]['albums'] or track not in self.mediaLibrary[artist]['albums'][album]['tracks']): common.safeStderr('Entry not found in library: ['+artist+']['+album+']['+track+']') continue artistTags.extend(self.mediaLibrary[artist]['tags'] or []) trackTags.extend(self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'] or []) localArtistTags = self.lastTagsToLocalTags(artistTags) localTrackTags = self.lastTagsToLocalTags(trackTags) # Use untagged tags, if requested and appropriate if (len(localArtistTags) == 0 and writeUntaggedArtist): localArtistTags = [(u'untagged artist', 0)] if (len(localTrackTags) == 0 and writeUntaggedTrack): localTrackTags = [(u'untagged track', 0)] tagPayload = {} for touchedField in touchedFields: if (touchedField in artistTagFields and touchedField in trackTagFields): fieldTags = common.distinctTagSeq(localArtistTags + localTrackTags) elif (touchedField in artistTagFields): fieldTags = localArtistTags else: fieldTags = localTrackTags if (fieldTags is None or len(fieldTags) == 0) : continue # The following section is mostly to deal with multi-column sorting # Store the record weights somewhere we can look them up (the list should already be distinct) recordWeights = {} for tagpair in fieldTags: recordWeights[tagpair[0].lower()] = tagpair[1] # Pull out just the tag names as singleton tuples, we'll tack on sort weights next tagWeightsList = map(lambda tuple: (tuple[0],), fieldTags) # Pull out the list of sort rules (e.g. record, library) and append each appropriate weight to the tuple list, in succession sortRules = map(string.strip, self.config.get(touchedField + 'Sort').lower().split(',')) for sortRule in sortRules: if (sortRule == 'record'): tagWeightsList = map(lambda tagtuple: tagtuple + (recordWeights[tagtuple[0].lower()],), tagWeightsList) elif (sortRule == 'library'): tagWeightsList = map(lambda tagtuple: tagtuple + (self.getLibraryWeight(tagtuple[0].lower()),), tagWeightsList) elif (sortRule == 'popularity'):tagWeightsList = map(lambda tagtuple: tagtuple + (self.getPopularityWeight(tagtuple[0].lower()),), tagWeightsList) common.sortWeightedTagTuples(tagWeightsList) tagPayload[touchedField] = self.formattedTagList(tagWeightsList, startDelim, endDelim) if (self.mediaHelper.updateTags(os.path.join(root, filename), tagPayload)): numfiles += 1 if (verbose): common.safeStdout('\tUpdated: '+os.path.join(root, filename)) elif (verbose): common.safeStdout('\tSkipped: '+os.path.join(root, filename)+' (nothing to update)') print 'Updated ['+str(numfiles)+'] media files'