def findSong(self, song): if song.key in self.files: other = self.files[song.key] if song.compareAudio(other): return other else: warn("File %s has the same name as %s but has different audio" % (song.getPath(), other.getPath())) size = song.getSize() if size in self.sizes: audioMd5 = song.getAudioMd5() if audioMd5 in self.audioMd5s: return self.audioMd5[audioMd5] sizeList = self.sizes[size] while len(sizeList) > 0: other = sizeList.pop() otherAudioMd5 = other.getAudioMd5() self.audioMd5s[otherAudioMd5] = other if audioMd5 == otherAudioMd5: return other return None
def copy(self, toPath): toDir = os.path.dirname(toPath) if not os.path.exists(toDir): os.makedirs(toDir) shutil.copyfile(self.getPath(), toPath) try: shutil.copystat(self.getPath(), toPath) except OSError: warn("Unable to set the modification time", file=toPath)
def readFile(self): if self.md5: return self.open() self.md5 = md5.new() while True: header = self.read(3, "header tag", {'eof'}) if len(header) == 0: # EOF raise Mp3FileError(self.filePath, self.stream.tell(), "EOF without any MP3 content") if (header == "ID3"): self.readID3v2Tag(header) continue # Check for sync bytes # if header[:2] != "\xFF\xFB": discarded = 1 header = header[1:] while header != "\xFF\xFB": discarded += 1 header = header[1] + self.read(1, "sync byte", {'eof'}) if len(header) < 2: raise Mp3FileError(self.filePath, self.stream.tell(), "EOF searching for MP3 sync bytes") warn("Found sync bytes after skipping %d bytes\n" % (discarded), self.filePath, self.stream.tell()) self.md5.update(header) self.audioMd5 = md5.new(header) remainder = self.stream.read() self.md5.update(remainder) self.audioMd5.update(remainder) break #print "'" + self.frames['TPE1'] + "' == '" + (self.dirArtist if self.dirArtist else "") + "'" #self.score += 1 if self.frames.get('TPE1') == self.dirArtist else 0 #self.score += 1 if self.frames.get('TPE1') == self.fileArtist else 0 #self.score += 1 if self.frames.get('TALB') == self.dirAlbum else 0 #self.score += 1 if self.frames.get('TALB') == self.fileAlbum else 0 #self.score += 1 if self.frames.get('TRCK') == self.fileTrack else 0 #self.score += 1 if self.frames.get('TIT2') == self.fileTitle else 0 self.close()
def readID3v2Frame(self): frameHeader = self.readFromTag(10, "ID3v2 frame header") if ord(frameHeader[:1]) == 0: self.expectPadding(frameHeader[1:]) return frameId = frameHeader[:4] frameBodyLength = bigEndianInteger(frameHeader[4:8]) frameFlags = bigEndianInteger(frameHeader[9:10]) if frameFlags & 0x20: self.readFromTag(1, "ID3v2 frame group identifier byte") try: frameBody = self.readFromTag(frameBodyLength, "ID3v2 '%s' frame body" % (frameId)) except Mp3FileError as error: if getOption('warning'): sys.stderr.write("warning: " + str(error) + ": skipping remainder of ID3v2 tag\n") return False if (self.expect("ignoring '%s' frame: compression flag" % (frameId), frameFlags & 0x80, 0) or self.expect("ignoring '%s' frame: encryption flag" % (frameId), frameFlags & 0x40, 0)): return True if frameId[0] == "T" or frameId == "IPLS": encoding = ord(frameBody[0]) self.requireMember("encoding descriptor for %s frame" % (frameId), encoding, {0, 1, 2, 3}) value = frameBody[1:].decode(id3v2EncodingToPythonEncoding[encoding], errors="strict") if frameId == "TRCK": if value == "": warn("TRCK tag is present but empty (ignoring it)", self.filePath, self.stream.tell()) return False self.track = int(value.split("/")[0]) else: value = frameBody self.frames[frameId] = value return True
def createAudioFile(filePath, rootPath=None): match = extensionPattern.match(filePath) if not match: return None ext = match.group(1).lower() if ext == "mp3": return Mp3File(filePath, rootPath) if ext in audioExtensions: return AudioFile(filePath, rootPath) if ext in ignoredExtensions: return None warn("muse.createAudioFile: warning: Ignoring unknown file extension '%s' in file %s" % (ext, filePath)) ignoredExtensions.add(ext) return None
def expectMember(self, description, actual, expectedSet): if actual not in expectedSet: warn("%s was %s but required to be in %s" % (description, actual, expectedSet), self.filePath, self.stream.tell()) return actual
def expect(self, description, actual, expected): if actual != expected: warn("%s was %s but expected %s" % (description, actual, expected), self.filePath, self.stream.tell()) return actual
def __init__(self, library, subdir = ".", artistRe = None, albumRe = None, titleRe = None): self.files = {} # Songs indexed by filename self.audioMd5s = {} # Songs indexed by MD5 self.sizes = {} # Lists of songs, indexed by size (only until audio MD5s are generated) self.artists = {} self.artistMaxLen = len("Artist or Group") self.albumMaxLen = len("Album") self.titleMaxLen = len("Song") self.library = os.path.abspath(library) # Pass this single reference to all songs to conserve memory curDir = os.getcwd() os.chdir(library) # Keep paths short for dirPath, subDirs, dirFiles in os.walk(subdir): for file in dirFiles: filePath = dirPath + "/" + file try: audioFile = createAudioFile(filePath, rootPath=library) if not audioFile: continue key = file.lower() # The lower case file name must be a unique key in the library audioFile.key = key # Only check like sized files for dups while constructing if run in 'fix' mode (slows loading a lot) # if options['fix']: other = self.findSong(audioFile) if other: warn("Ignoring file %s because its audio is identical to %s" % (filePath, other.filePath)) continue if key in self.files: warn("Ignoring file %s because it's name duplicates %s" % (filePath, self.files[key].filePath)) continue self.files[key] = audioFile size = audioFile.getSize() self.sizes[size] = self.sizes[size] if size in self.sizes else [] self.sizes[size].append(audioFile) #print filePath artist = audioFile.artist if audioFile.artist else "Unknown" album = audioFile.album if audioFile.album else "Unknown" title = audioFile.title if artist not in self.artists: self.artists[artist] = {'artist': audioFile.artist, 'albums': {}} self.artistMaxLen = max(self.artistMaxLen, len(audioFile.artist)) if album not in self.artists[artist]['albums']: self.artists[artist]['albums'][album] = {'album': audioFile.album, 'songs': {}} self.albumMaxLen = max(self.albumMaxLen, len(audioFile.album)) if title not in self.artists[artist]['albums'][album]['songs']: self.artists[artist]['albums'][album]['songs'][title] = audioFile self.titleMaxLen = max(self.titleMaxLen, len(audioFile.title)) except Mp3FileError as error: warn("Skipping invalid MP3 file (error %s)" % str(error), filePath) continue os.chdir(curDir) # Restore the path for the caller
def __init__(self, filePath, rootPath=None): MuseFile.__init__(self, filePath, rootPath) self.artist = None self.album = None self.md5 = None self.frames = {} # Determine meta-data from file path match = AudioFile.splitPattern.match(filePath) if not match: raise AudioFileError(filePath, "Failed to parse file path") (dirName, fileName, fileExt) = match.group(1, 2, 3) match = AudioFile.dirPattern.match(dirName) if not match: raise AudioFileError(filePath, "Failed to parse directory name " + dirName) (dirLetter, dirArtist, dirAlbum) = match.group(1, 2, 3) if not match.group(2) and match.group(3): dirAlbum = None if dirLetter or len(match.group(3)) > 1: dirArtist = match.group(3) else: dirLetter = match.group(3) #print "Directory: Letter %s Artist %s Album %s" % (dirLetter, dirArtist, dirAlbum) match = AudioFile.filePattern.match(fileName) if not match: raise AudioFileError(filePath, "Failed to parse file name " + fileName) (fileArtist, fileAlbum, self.track, self.title) = match.group(1, 2, 3, 4) #print ("File name: Letter %s Artist %s Album %s Track %s Song %s Ext %s" % # (dirLetter, fileArtist, fileAlbum, self.track, self.title, fileExt)) self.artist = reconcileStrings(dirArtist, fileArtist, default="Unknown") if self.artist == "Unknown": warn("Failed to determine an artist from the filepath", filePath) elif self.artist == None: self.artist = dirArtist match = AudioFile.numericPattern.match(fileArtist) if match and self.track == None: self.track = match.group(1) fileArtist = match.group(2) fileAlbum = None if fileArtist: match = AudioFile.artistAlbumPattern.match(fileArtist) if match and reconcileStrings(dirArtist, match.group(1)) and reconcileStrings(dirAlbum, match.group(2)): fileAlbum = match.group(2) else: match = AudioFile.withArtistPattern.match(fileArtist) if (not (match and (reconcileStrings(match.group(1), dirArtist) or reconcileStrings(match.group(2), dirArtist))) and not simpleString(dirArtist) == simpleString(fileArtist).replace("_", " ")): error("Directory artist '%s' differs from file name artist '%s'" % (dirArtist, fileArtist), filePath) self.album = reconcileStrings(dirAlbum, fileAlbum, default="Unknown") if self.album == "Unknown": info("Failed to determine an album from the filepath", filePath) # Too common to warn on elif self.album == None: self.album = dirAlbum if fileArtist.isdigit() and reconcileStrings(fileAlbum, dirArtist): self.track = fileArtist elif not reconcileStrings(fileAlbum, dirAlbum): match = AudioFile.yearAlbumPattern.match(dirAlbum) if match and reconcileStrings(fileAlbum, match.group(2)): self.year = match.group(1) self.album = match.group(2) else: error("Directory album '%s' differs from file name album '%s'" % (dirAlbum, fileAlbum), filePath) newPath = ("%s/%s%s%s%s.%s" % (dirName, safeAppend(self.artist, " - ", suppress="Unknown"), safeAppend(self.album, " - ", suppress="Unknown"), safeAppend(self.track, " - "), self.title, fileExt)) if getOption("fix", default=False) and newPath != filePath and takeAction("rename %s to %s" % (filePath, newPath)): os.rename(filePath, newPath) os.utime(newPath, None)
def reconcile(self): self.readFile() simpleArtist = reconcileStrings(self.dirArtist, self.fileArtist, self.frames.get('TPE1')) simpleAlbum = reconcileStrings(self.dirAlbum, self.fileAlbum, self.frames.get('TALB')) simpleTitle = reconcileStrings( self.fileTitle, self.frames.get('TIT2')) if (simpleArtist == None): if (self.dirArtist, self.fileArtist, self.frames.get('TPE1')) == (None, None, None): warn("Can't identify an artist", self.getPath()) else: warn("Conflicting artists (tag %s)" % (self.frames.get('TPE1')), self.getPath()) return if (simpleAlbum == None): if (self.dirAlbum, self.fileAlbum, self.frames.get('TALB')) == (None, None, None): warn("Can't identify an album", self.getPath()) else: warn("Conflicting albums (tag %s)" % (self.frames.get('TALB')), self.getPath()) return if (simpleTitle == None): if (self.fileTitle, self.frames.get('TIT2')) == (None, None, None): warn("Can't identify a title", self.getPath()) else: warn("Conflicting titles (tag %s)" % (self.frames.get('TIT2')), self.getPath()) return if self.dirArtist: if self.dirArtist == self.fileArtist: if self.frames.get('TPE1') != self.fileArtist: print "%s: artist tag %s differs from directory/file artist %s" % (self.getPath(), self.frames.get('TPE1'), self.fileArtist) elif self.dirArtist == self.frames.get('TPE1'): if simpleString(self.dirArtist) == simpleString(self.fileArtist): if takeAction("rename file %s to %s to match directory/tagged artist %s" % (self.getPath(), self.getPath().replace(self.fileArtist, self.dirArtist), self.dirArtist)): os.rename(self.getPath(), self.getPath().replace(self.fileArtist, self.dirArtist)) elif self.fileArtist == None: if self.fileName.endswith(self.frames.get('TIT2')): head = self.fileName[:-len(self.frames.get('TIT2'))].strip() match = self.trackNumberPattern.match(head) if match: if int(match.group(1)) == self.track: toPath = "%s/%s - %d - %s.self.dirName" if takeAction("rename file %s to %s to match directory/tagged artist %s" % (self.getPath(), self.getPath().replace(self.fileArtist, self.dirArtist), self.dirArtist)): os.rename(self.getPath(), self.getPath().replace(self.fileArtist, self.dirArtist)) print "'" + head + "'" print ("%s: filename ends with title %s (no file artist; directory/tagged artist %s)" % (self.getPath(), self.frames.get('TIT2'), self.dirArtist)) else: print "%s: no file artist parsed (directory/tagged artist %s)" % (self.getPath(), self.dirArtist) else: print "%s: file artist %s differs from directory/tagged artist %s" % (self.getPath(), self.fileArtist, self.dirArtist)