예제 #1
0
 def fetchArtistTags(self, artist, maxTagsToFetch, minWeight, retries=3):
     ''' 
     Retrieve artist tags from LastFM, filtering out those tags that appear bunky (below the specified
     weight, longer than the maximum allowable distance, self-referential, etc.
     '''
     try:
         lastfm = pylast.get_lastfm_network(api_key=self.api_key,
                                            api_secret=self.api_secret)
         tags = self.processSeenTags(
             lastfm.get_artist(artist).get_top_tags(limit=maxTagsToFetch),
             minWeight)
         return filter(
             lambda pair: pair[0].lower().replace('the', '').strip() !=
             artist.replace('the', '').strip(), tags)
     except Exception, err:
         if ('no artist' in str(err).lower()): return []
         if (retries > 0):
             common.safeStderr(
                 'Problem retrieving artist tag information for [' +
                 artist + '], ' + str(retries) + ' retries left: ' +
                 str(err))
             time.sleep(5)
             return self.fetchArtistTags(artist, maxTagsToFetch, minWeight,
                                         retries - 1)
         else:
             common.safeStderr(
                 'Problem retrieving artist tag information for [' +
                 artist + '], skipping: ' + str(err))
예제 #2
0
    def updateTags(self, filename, tagPayload):
        try:
            mediawrapper = self.getMediawrapper(filename)

            for bucket in tagPayload:
                tagPayload[bucket] = self.tagSep.join(
                    tagPayload[bucket][0:self.maxTags[bucket]])

            if (isinstance(mediawrapper, ID3)):
                return self.updateTagsHelperID3(mediawrapper, tagPayload,
                                                self.formatFieldMap['id3'])
            elif (isinstance(mediawrapper, MP4)):
                return self.updateTagsHelper(mediawrapper, tagPayload,
                                             self.formatFieldMap['mp4'])
            elif (isinstance(mediawrapper, OggVorbis)):
                return self.updateTagsHelper(mediawrapper, tagPayload,
                                             self.formatFieldMap['oggvorbis'])
            elif (isinstance(mediawrapper, FLAC)):
                return self.updateTagsHelper(mediawrapper, tagPayload,
                                             self.formatFieldMap['flac'])
            else:
                common.safeStdout(
                    'Skipping unknown/incompatible media file type [' +
                    filename + ']')
        except Exception, err:
            common.safeStderr('Error seen during update processing: ' +
                              str(err))
예제 #3
0
    def extractMetadataHelper(self, mediawrapper, fieldMap, filename):
        ''' Retrieves artist, album, and track data, forcing it to unicode '''
        artists = []
        for artistField in self.artistFieldPref:
            if (fieldMap[artistField] in mediawrapper):
                tmpartist = mediawrapper[fieldMap[artistField]][0]
                if (not common.isempty(tmpartist)):
                    artists.append(unicode(tmpartist).lower())
                if (self.useBothArtistFields):
                    continue
                break
        artists = set(artists).difference(self.meaninglessArtists)
        if (len(artists) == 0):
            common.safeStderr('No artist info found for [' + filename + ']')
            return None

        # album
        album = u'-unknown-'
        if (fieldMap['album'] in mediawrapper):
            tmpalbum = mediawrapper[fieldMap['album']][0]
            if (not common.isempty(tmpalbum)):
                album = unicode(tmpalbum).lower()

        # track
        track = None
        if (fieldMap['track'] in mediawrapper):
            tmptrack = mediawrapper[fieldMap['track']][0]
            if (not common.isempty(tmptrack)):
                track = unicode(tmptrack).lower()
        if (track is None):
            common.safeStderr('No track title found for [' + filename + ']')
            return None
        return {'artists': artists, 'album': album, 'track': track}
예제 #4
0
    def extractMetadata(self, filename):
        try:
            mediawrapper = self.getMediawrapper(filename)

            if (isinstance(mediawrapper, ID3)):
                return self.extractMetadataHelper(mediawrapper,
                                                  self.formatFieldMap['id3'],
                                                  filename)
            elif (isinstance(mediawrapper, MP4)):
                return self.extractMetadataHelper(mediawrapper,
                                                  self.formatFieldMap['mp4'],
                                                  filename)
            elif (isinstance(mediawrapper, OggVorbis)):
                return self.extractMetadataHelper(
                    mediawrapper, self.formatFieldMap['oggvorbis'], filename)
            elif (isinstance(mediawrapper, FLAC)):
                return self.extractMetadataHelper(mediawrapper,
                                                  self.formatFieldMap['flac'],
                                                  filename)
            else:
                if (self.config.getboolean('verbose')):
                    common.safeStdout(
                        '\tSkipping unknown/incompatible media file type [' +
                        filename + ']')
        except Exception, err:
            common.safeStderr('Error seen during media reading: ' + str(err))
예제 #5
0
 def fetchTagStats(self, lastfm):
     ''' Fetch overall/LastFM-wide tag counts. Currently only works for LastFM's 'top tracks' (they don't syndicate counts for arbitrary tags '''
     toptags = lastfm.fetchTopTagStats()
     if (toptags is None or len(toptags) == 0):
         common.safeStderr('Could not retrieve tag counts from lastFM')
         for lasttag in self.lastTagLibrary:
             self.lastTagLibrary[lasttag] = 0
         return
     #lonelytags = set()
     for lasttag in self.lastTagLibrary:
         if (lasttag in toptags):
             self.lastTagLibrary[lasttag] = toptags[lasttag]
예제 #6
0
 def fetchTagCount(self, tag, retries=3):
     ''' Return a count/weight, for the specified tag. The API no longer seems to return counts, though, so this is currently deprecated. '''
     try:
         lastfm = pylast.get_lastfm_network(api_key=self.api_key,
                                            api_secret=self.api_secret)
         results = lastfm.search_for_tag(tag)
         if (results.get_total_result_count() > 0):
             topmatch = results.get_next_page()[0]
             if (topmatch is not None):
                 if (self.useNamedTuple):
                     return int(topmatch.weight)
                 return int(topmatch['weight'])
     except Exception, err:
         if (retries > 0):
             common.safeStderr('Problem retrieving tag information, ' +
                               str(retries) + ' retries left: ' + str(err))
             time.sleep(5)
             return self.fetchTopTagStats(retries - 1)
         else:
             common.safeStderr('Problem retrieving tag information, ' +
                               str(retries) + ' skipping: ' + str(err))
예제 #7
0
 def fetchTopTagStats(self, retries=3):
     '''
     LastFM provides a unified list tags/counts, for the top tags. By fetching these in one call, we can
     typically avoid a ton of unnecessary network calls for individual tags.
     '''
     tags = {}
     try:
         lastfm = pylast.get_lastfm_network(api_key=self.api_key,
                                            api_secret=self.api_secret)
         lastTopTags = lastfm.get_top_tags(10000)
         for lastTopTag in lastTopTags:
             if (self.useNamedTuple):
                 key = unicode(lastTopTag.item.name).lower()
                 count = int(lastTopTag.weight)
             else:
                 key = unicode(lastTopTag['item'].name).lower()
                 count = int(lastTopTag['weight'])
             if (key in tags):
                 common.safeStderr(
                     'Duplicate tag retrieved from lastFM, merging counts: '
                     + lastTopTag)
                 tags[key] += count
             else:
                 tags[key] = count
         return tags
     except Exception, err:
         if (retries > 0):
             common.safeStderr('Problem retrieving top tag information, ' +
                               str(retries) + ' retries left: ' + str(err))
             time.sleep(5)
             return self.fetchTopTagStats(retries - 1)
         else:
             common.safeStderr('Problem retrieving top tag information, ' +
                               str(retries) + ' skipping: ' + str(err))
예제 #8
0
    def fromXml(self, rootElement):
        numartists = 0
        numalbums = 0
        numtracks = 0
        try:
            artistsElement = rootElement.find('artists')
            for artistElement in artistsElement.findall('artist'):
                nameElement = artistElement.find('name')
                if (nameElement is None):
                    common.safeStderr('Missing name element on ['+artistElement.tag+']')
                    continue                
                artist = unicode(nameElement.text).lower()
                
                # tags = None means there is no tag info, tags = [] means we know it's an empty list
                artistTags = None
                artistTagElements = artistElement.findall('tag')
                if (artistTagElements is not None and len(artistTagElements) > 0):
                    artistTags = []
                    for artistTagElement in artistTagElements:
                        artistTags.append((unicode(artistTagElement.text), int(artistTagElement.get('weight'))))
                elif (artistElement.find('notags') is not None):
                    artistTags = []
        
                for albumElement in artistElement.findall('album'):
                    nameElement = albumElement.find('name')
                    if (nameElement is None):
                        common.safeStderr('Missing name element on ['+albumElement.tag+']')
                        continue
                    album = unicode(nameElement.text).lower()
                    
                    for trackElement in albumElement.findall('track'):
                        nameElement = trackElement.find('name')
                        if (nameElement is None):
                            common.safeStderr('Missing name element on ['+trackElement.tag+']')
                            continue
                        track = unicode(nameElement.text).lower()
                            
                        # tags = None means there is no tag info, tags = [] means we know it's an empty list
                        trackTags = None
                        trackTagElements = trackElement.findall('tag')
                        if (trackTagElements is not None and len(trackTagElements) > 0):
                            trackTags = []
                            for trackTagElement in trackTagElements:
                                trackTags.append((unicode(trackTagElement.text), int(trackTagElement.get('weight'))))
                        elif (trackElement.find('notags') is not None):
                            trackTags = []

                        self.addToMediaLibrary(artist, album, track, artistTags, trackTags)
                        
                        numtracks += 1
                    numalbums += 1
                numartists += 1
            print 'Loaded ['+str(numartists)+'] artists, ['+str(numalbums)+'] albums, and ['+str(numtracks)+'] cached tracks'
            
            lastTagsElement = rootElement.find('lasttags')
            for lastTagElement in lastTagsElement.findall('tag'):
                self.addToLastFMTagLibrary(unicode(lastTagElement.text), int(lastTagElement.get('hits')))
        except Exception, err:
            raise Exception('Could not deserialize the XML cache data, possibly corrupted: '+str(err)), None, sys.exc_info()[2]
예제 #9
0
 def loadSynonyms(self):
     synfile = self.config.get('tagSynonymsFile')
     if (common.isempty(synfile)):
         return
     if (not os.path.exists(synfile) or not os.access(synfile, os.R_OK)):
         common.safeStderr('Synonyms file either does not exist or cannot be accessed ['+synfile+']')
     
     # Read the synonmyms file. The expected format is:
     # original token(tab)replacement token[,replacement token]...
     # e.g. 
     # rnb    rhythm and blues, r&b
     # This would replace any instance of 'rnb' seen in the LastFM tag set with both 'rhythm and blues' and 'r&b'
     # We preserve order, for the replacement values (so you can order them as you would like them to be replaced)
     for line in fileinput.input(synfile):
         # Allow inline comments
         if ('#' in line):
             line = line.split('#')[0]
         line = line.strip()
         if (common.isempty(line)):
             continue
         if (isinstance(line, str)):                
             line = unicode(line, 'latin1')
         synline = line.split('\t')
         if (len(synline) < 2):
             common.safeStderr('Invalid synonym file line: '+line)
             continue
         original = synline[0].lower()
         replacements = map(string.strip, synline[1].split(','))
         if ('-none-' in map(lambda val: val.lower(), replacements)):
             self.synonyms[original] = []
         elif (original in self.synonyms):
             self.synonyms[original] = common.distinctSeq(self.synonyms[original] + replacements)
         else:
             self.synonyms[original] = common.distinctSeq(replacements)
     #for syn in sorted(self.synonyms):
     #    common.safeStdout('Synonyms: '+ syn + ' :: '+ ', '.join(sorted(self.synonyms[syn])))
     if (self.config.getboolean('verbose')):
         print 'Loaded ['+str(len(self.synonyms.keys()))+'] tag synonyms'           
예제 #10
0
    def updateTags(self):
        ''' This pushes the tags back into the underlying media files '''
        verbose             = self.config.getboolean('verbose')
        mediadir            = self.config.get('mediaDir')
        startDelim          = self.config.get('tagStartDelim')
        endDelim            = self.config.get('tagEndDelim')
        artistTagFields     = set(map(string.strip, self.config.get('artistTagFields').lower().split(',')))
        trackTagFields      = set(map(string.strip, self.config.get('trackTagFields').lower().split(',')))
        touchedFields       = artistTagFields.union(trackTagFields)
        skipExtensions      = map(lambda x: '.'+x.lower().strip(), self.config.get('skipExtensions').split(','))
        writeUntaggedArtist = (self.config.get('writeUntaggedTag').lower() == 'artist' or self.config.get('writeUntaggedTag').lower() == 'both')
        writeUntaggedTrack  = (self.config.get('writeUntaggedTag').lower() == 'track' or self.config.get('writeUntaggedTag').lower() == 'both')
        
        if (touchedFields is None or len(touchedFields) == 0):
            common.safeStderr('Perhaps you should configure a destination field...')
            return
        
        self.loadSynonyms()
        self.generateLocalTags()
        
        common.safeStdout('Updating tags in ['+mediadir+']')
        numfiles = 0
        for root, dirs, files in os.walk(mediadir):
            for filename in files:
                fname, ext = os.path.splitext(filename.lower())
                if (ext is not None and ext in skipExtensions):
                    continue

                metadata = self.mediaHelper.extractMetadata(os.path.join(root, filename))
                if (metadata is None or len(metadata['artists']) == 0 or metadata['album'] is None or metadata['track'] is None):
                    continue
                album, track = metadata['album'].lower(), metadata['track'].lower()
                
                artistTags = []
                trackTags = []
                for artist in map(string.lower, metadata['artists']):                                           
                    if (artist not in self.mediaLibrary or 
                        album not in self.mediaLibrary[artist]['albums'] or 
                        track not in self.mediaLibrary[artist]['albums'][album]['tracks']):
                        common.safeStderr('Entry not found in library: ['+artist+']['+album+']['+track+']')
                        continue
                    artistTags.extend(self.mediaLibrary[artist]['tags'] or [])
                    trackTags.extend(self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'] or [])
                                 
                localArtistTags = self.lastTagsToLocalTags(artistTags)
                localTrackTags = self.lastTagsToLocalTags(trackTags)
                
                # Use untagged tags, if requested and appropriate
                if (len(localArtistTags) == 0 and writeUntaggedArtist):  localArtistTags = [(u'untagged artist', 0)]
                if (len(localTrackTags) == 0 and writeUntaggedTrack):    localTrackTags = [(u'untagged track', 0)]
                
                tagPayload = {}
                for touchedField in touchedFields:
                    if (touchedField in artistTagFields and touchedField in trackTagFields):
                        fieldTags = common.distinctTagSeq(localArtistTags + localTrackTags)
                    elif (touchedField in artistTagFields):
                        fieldTags = localArtistTags
                    else:
                        fieldTags = localTrackTags

                    if (fieldTags is None or len(fieldTags) == 0) : 
                        continue

                    # The following section is mostly to deal with multi-column sorting
                    
                    # Store the record weights somewhere we can look them up (the list should already be distinct)
                    recordWeights = {}
                    for tagpair in fieldTags:
                        recordWeights[tagpair[0].lower()] = tagpair[1]

                    # Pull out just the tag names as singleton tuples, we'll tack on sort weights next                                        
                    tagWeightsList = map(lambda tuple: (tuple[0],), fieldTags)
                                        
                    # Pull out the list of sort rules (e.g. record, library) and append each appropriate weight to the tuple list, in succession
                    sortRules = map(string.strip, self.config.get(touchedField + 'Sort').lower().split(','))                    
                    for sortRule in sortRules:
                        if (sortRule == 'record'):      tagWeightsList = map(lambda tagtuple: tagtuple + (recordWeights[tagtuple[0].lower()],), tagWeightsList)
                        elif (sortRule == 'library'):   tagWeightsList = map(lambda tagtuple: tagtuple + (self.getLibraryWeight(tagtuple[0].lower()),), tagWeightsList)
                        elif (sortRule == 'popularity'):tagWeightsList = map(lambda tagtuple: tagtuple + (self.getPopularityWeight(tagtuple[0].lower()),), tagWeightsList)
                    
                    common.sortWeightedTagTuples(tagWeightsList)
                    
                    tagPayload[touchedField] = self.formattedTagList(tagWeightsList, startDelim, endDelim)
                        
                if (self.mediaHelper.updateTags(os.path.join(root, filename), tagPayload)):                                                                      
                    numfiles += 1
                    if (verbose):
                        common.safeStdout('\tUpdated: '+os.path.join(root, filename))
                elif (verbose):
                    common.safeStdout('\tSkipped: '+os.path.join(root, filename)+' (nothing to update)')
        print 'Updated ['+str(numfiles)+'] media files'