def logInitialState(self): """Log the initial state of the filenames, tags and MusicDNS results. TODO: Complete this function and use it or remove it.""" for track in self.release.tracks: log("File path: %s" % track.filePath, "Debugging")
def handleDirectory(directoryPath): """Take actions based on file types present and the user's configuration.""" filePathsByType = functions.getFilePathsByType(directoryPath) if configuration.ACTIONS["IMAGE"] and "image" in filePathsByType: # Rename/delete image(s) clean.handleImages(filePathsByType["image"]) if configuration.ACTIONS["CLEAN"] and "other" in filePathsByType: # Delete extra files clean.cleanDir(filePathsByType["other"]) if configuration.ACTIONS["EXTRACT"] and "archive" in filePathsByType: # Extract archives extract.extract(filePathsByType["archive"]) # There may be new subdirectories return traverse(directoryPath) # Traverse again if configuration.ACTIONS["CONVERT"] and "bad_audio" in filePathsByType: # Convert audio to Ogg and scan again convert.convert(filePathsByType["bad_audio"]) return handleDirectory(directoryPath) if not "good_audio" in filePathsByType: # Continue if audio present log("\nNo audio found in %s." % quote(directoryPath)) # FIXME: If the user has not requested the metadata process, should this # directory actually be accepted rather than deleted at this point? if directoryPath != configuration.PATHS["CURRENT"]: functions.deleteItem(directoryPath) return if configuration.ACTIONS["SPLIT"] and "cue" in filePathsByType: # Split based on cue and scan again split.split(filePathsByType["cue"], filePathsByType["good_audio"]) return handleDirectory(directoryPath) if configuration.ACTIONS["METADATA"]: # Handle metadata audioPaths = clean.standardizeFilenames(filePathsByType["good_audio"]) metadata.handleMetadata(directoryPath, audioPaths)
def writeResults(self): """After tags have been found, write tags and filenames for all tracks.""" flowcontrol.checkpoint() for track in self.release.tracks: flowcontrol.checkpoint(pauseOnly=True) track.writeTags() track.rename() log(" ")
def logResults(self, results): """Logs the results in a tabular format.""" log("\n\n\n%s Results:" % self.fieldName.upper()) maxGetter = max([len(getter) for _, _, getter, _ in results]) maxFilename = max([len(filename) for _, _, _, filename in results]) for (candidate, weight, getter, fileName) in results: log(" %s%s%s" % (getter[3:].ljust(maxGetter), fileName.ljust(maxFilename+3), candidate if candidate else ""))
def handleMetadata(directoryPath, audioFilePaths): """Create and run a ReleaseManager object.""" releaseManager = Manager.ReleaseManager(directoryPath, audioFilePaths) try: releaseManager.run() except Manager.ReleaseManagerError, e: log("\nCould not identify and tag audio.") log(str(e)) functions.rejectItem(directoryPath)
def findConsensus(self, data): """Take data from getters and find the value with the highest score. Candidates that differ by only capitalization and punctuation are grouped together for scoring. Example: If the scores come back 5 Chick Corea 4 Song of Singing 3 Song Of Singing then the "song of singing" group will win with 7 points and the "Song of Singing" candidate will be chosen because it is the highest scoring candidate of that group.""" flowcontrol.checkpoint() # Create a dict of values to sums of weights while removing null results. scores = {} groupScores = {} for (candidate, weight, name, track) in data: if candidate: group = restrictChars(candidate, punctuation=False).lower() scores[candidate] = scores.get(candidate, 0) + weight groupScores[group] = groupScores.get(group, 0) + weight # Ensure that we have data, otherwise return None indicating failure if not scores: log("Unable to find consensus -- no getters returned valid results.") return None # Rank the groups and the candidates groups = [(score, group) for group, score in groupScores.items()] groups.sort(reverse=True) candidates = [(score, candidate) for candidate, score in scores.items()] candidates.sort(reverse=True) # Display candidates (and groups, if different). log("Candidates:") for score, candidate in candidates: log(" %s %s" % (str(score).rjust(4), candidate)) if len(groups) != len(candidates): log("\nGroups:") for score, group in groups: log(" %s %s" % (str(score).rjust(4), group)) # Pick the highest member of the winning group. topGroupScore, topGroup = groups[0] for score, candidate in candidates: if restrictChars(candidate, punctuation=False).lower() == topGroup: return candidate return winningCandidate
def getMBTag(self, track): """Fuzzily match current value in tag using MusicBrainz.""" tag = tagging.getTag(track.filePath, self.fieldName) if not tag: log("Unable to match because current tag is empty.") result = None else: result = mb.askMB(self.fieldName, tag, track) return result
def getMBFilenameKnownData(self, track): """Try to match the file name to a title using MB.""" if not ("release" in track.metadata and "tracknumber" in track.metadata): log("The currently known data does not include the fields we need --" " the release and track number.") result = None else: fileName = self.getFilenameForMB(track) # Here we're searching for a track. result = mb.askMB(self.fieldName, fileName, track, ["release", "tracknumber", "artist"]) return result
def standardizeFilenames(itemPaths, rename=True): """Take file or dir paths; rename them to meet standards; return new paths. The current standards being enforced in order of appearance: Spaces - not underscores or periods - should separate words. Filenames should contain no special characters. There should never be more than one consecutive space. Filenames should not begin or end in a space. All extensions should be lowercase. The only part of the process that is not very straight-forward is replacing underscores and periods with spaces. In particular, determining whether to replace periods is more complicated because periods are acceptable in acronyms, ellipses, etc. Here is the current method for handling this: Determine whether we have an acronym by splitting the root by the period char, then checking if each substring is of length one (with the possible exception of the last substring which will be empty if the root ends in a period). If not, we then replace each "lone period" (a period not preceded or followed by another period) with a space.""" for (i, itemPath) in enumerate(itemPaths): directoryPath, itemName = os.path.split(itemPath) root, extension = os.path.splitext(itemName) if not extension.lower() in configuration.typeToExts["good_audio"]: root += extension extension = "" if not " " in root: if "_" in root: root = root.replace("_", " ") elif "." in root: split = root.split(".") if not split[-1]: split.pop() if not all([len(substring) == 1 for substring in split]): lonePeriodRegex = r"(?<!\.)\.(?!\.)" root = " ".join(re.split(lonePeriodRegex, root)) root = re.sub(" {2,}", " ", root) # Replace 2 or more spaces with 1 root = root.strip() newItemName = root + extension.lower() newItemPath = os.path.join(directoryPath, newItemName) if newItemPath != itemPath: log("Renaming %s to %s." % (quote(itemName), quote(newItemName))) if rename: shutil.move(itemPath, newItemPath) itemPaths[i] = newItemPath return itemPaths
def parseResult(result, field): """Pull from the result the data field and return it. We have successfully conquered all of the dungeons. Below is the key to the final castle.""" finalResult = None if isinstance(result, musicbrainz2.wsxml.ReleaseResult): if field == "artist": finalResult = result.getRelease().getArtist().getName() elif field == "release": finalResult = result.getRelease().getTitle() elif field == "date": if result.getRelease().getEarliestReleaseDate(): finalResult = result.getRelease().getEarliestReleaseDate( ).split("-")[0] elif field == "tracktotal": tracktotal = result.getRelease().getTracksCount() finalResult = unicode(tracktotal).rjust(2, u"0") elif isinstance(result, musicbrainz2.wsxml.TrackResult): if field == "title": finalResult = result.getTrack().getTitle() elif field == "tracknumber": # Track numbers are zero-indexed. tracknumber = result.getTrack().getReleases()[0].getTracksOffset( ) + 1 finalResult = unicode(tracknumber).rjust(2, u"0") elif isinstance(result, musicbrainz2.wsxml.ArtistResult): if field == "artist": finalResult = result.getArtist().getName() # Why we would we ever get a Track here instead of a TrackResult? elif isinstance(result, musicbrainz2.model.Track): finalResult = result.getTitle() # HACK: This is used when matching tracknumbers, because if this function # recieved a TrackResult it would look only at the 0-th release, which may # not be correct. elif isinstance(result, musicbrainz2.model.Release): if field == "tracknumber": finalResult = unicode(result.getTracksOffset() + 1).rjust(2, u"0") if not finalResult: log("Something went wrong in parseResult. Result type: %s field: %s" % (result.__class__, field)) return toUnicode(finalResult)
def parseResult(result, field): """Pull from the result the data field and return it. We have successfully conquered all of the dungeons. Below is the key to the final castle.""" finalResult = None if isinstance(result, musicbrainz2.wsxml.ReleaseResult): if field == "artist": finalResult = result.getRelease().getArtist().getName() elif field == "release": finalResult = result.getRelease().getTitle() elif field == "date": if result.getRelease().getEarliestReleaseDate(): finalResult = result.getRelease().getEarliestReleaseDate().split("-")[0] elif field == "tracktotal": tracktotal = result.getRelease().getTracksCount() finalResult = unicode(tracktotal).rjust(2, u"0") elif isinstance(result, musicbrainz2.wsxml.TrackResult): if field == "title": finalResult = result.getTrack().getTitle() elif field == "tracknumber": # Track numbers are zero-indexed. tracknumber = result.getTrack().getReleases()[0].getTracksOffset()+1 finalResult = unicode(tracknumber).rjust(2, u"0") elif isinstance(result, musicbrainz2.wsxml.ArtistResult): if field == "artist": finalResult = result.getArtist().getName() # Why we would we ever get a Track here instead of a TrackResult? elif isinstance(result, musicbrainz2.model.Track): finalResult = result.getTitle() # HACK: This is used when matching tracknumbers, because if this function # recieved a TrackResult it would look only at the 0-th release, which may # not be correct. elif isinstance(result, musicbrainz2.model.Release): if field == "tracknumber": finalResult = unicode(result.getTracksOffset()+1).rjust(2, u"0") if not finalResult: log("Something went wrong in parseResult. Result type: %s field: %s" % (result.__class__, field)) return toUnicode(finalResult)
def getMBNumTracksInDir(self, track): """See if the number of tracks in the directory matches with MB.""" if not "release" in track.metadata: log("The currently known data does not include the field we need --" " the release.") result = None else: numTracks = self.getNumTracksInDir(track) result = mb.askMB(self.fieldName, numTracks, track, ["release", "artist", "date", "tracktotal"]) if result: result = result.zfill(2) return result
def clearTags(filePath): """Remove all tags from file.""" audioFile = openAudioFile(filePath) audioFile.delete() if ext(filePath) == ".ogg": return # If this is an Ogg, then we've done everything we need to do. audioFile = openAudioFile(filePath) try: audioFile.add_tags(ID3=EasyID3) audioFile["tracknumber"] = u"00" audioFile.save() except mutagen.id3.error: log("There was an error clearing the old ID3 tags.")
def openAudioFile(filePath): """Return, based on extension, an MP3 or Ogg Mutagen object.""" extension = ext(filePath) try: if extension == ".mp3": return MP3(filePath, ID3=EasyID3) elif extension == ".ogg": return Ogg(filePath) else: log("Cannot access %s tags. File must be an MP3 or Ogg." % quote(filePath)) # TODO: There's no good reason not to support other formats. # Mutagen supports lots of formats. raise NotImplementedError except HeaderNotFoundError: log("Could not open %s. File seems corrupted." % quote(filePath))
def getMBFilenameKnownData(self, track): """Attempt to fuzzily match release name from filepath using MusicBrainz. We look for the release name in the folder and file name.""" if not ("artist" in track.metadata or "date" in track.metadata or "title" in track.metadata): log("The currently known data does not include the fields we need --" " the artist or the date or the track titles.") result = None else: folderFilePath = self.getFilenameForMB(track) result = mb.askMB(self.fieldName, folderFilePath, track, ["artist", "date", "tracks", "tracktotal"]) return result
def rename(self): """Rename the file to [tracknumber] - [title].[ext].""" newBaseName = self.metadata["tracknumber"].rjust(2, u"0") newBaseName += " - " + translateForFilename(self.metadata["title"]) oldBaseName, ext = os.path.splitext(self.fileName) newPath = self.filePath.replace(oldBaseName, newBaseName) if not os.path.exists(newPath): log("Renaming %s to %s." % (quote(oldBaseName), quote(newBaseName))) shutil.move(self.filePath, newPath) elif newBaseName == oldBaseName: log("Old filename is correct. No renaming necessary.") else: log("Cannot rename %s." % quote(oldBaseName)) log("There already exists a file with the target filename.") log("Target filename: %s" % quote(newBaseName)) raise ReleaseManagerError, "Target filename already exists."
def getMBKnownData(self, track): """Query MB using known data. We can find the title from the release and a tracknumber. To do this we... Need: release, tracknumber Can Use: date, artist, tracktotal""" if not ("release" in track.metadata and "tracknumber" in track.metadata): log("The currently known data does not include the fields we need --" " the release and track number.") result = None else: # We can use these extra fields because we are searching for a # release, not a track. result = mb.askMB(self.fieldName, None, track, ["release", "tracknumber", "date", "artist", "tracktotal"]) return result
def getMBKnownData(self, track): """Query MB using known data. To find a date we... Need: A release Can Use: An artist, a tracktotal Might Use: Tracknames""" if not "release" in track.metadata: log("The currently known data does not include the field we need --" " the release.") result = None else: result = mb.askMB("date", None, track, ["release", "artist", "tracktotal"]) return result
def getMBKnownData(self, track): """Query MB using known data. To find a date we... Need: Artist AND (Date OR Titles) Can Use: Tracktotal""" if not (("artist" in track.metadata) and ("date" in track.metadata or "title" in track.metadata)): log("The currently known data does not include the fields we need --" " the artist and the (date or track titles).") result = None else: result = mb.askMB(self.fieldName, None, track, ["artist", "date", "tracks", "tracktotal"]) return result
def getMBTagKnownData(self, track): """Query MB using known data and the current tag.""" titleTag = tagging.getTag(track.filePath, "title") if not titleTag: log("The current tag is empty.") result = None elif not ("release" in track.metadata and "tracknumber" in track.metadata): log("The currently known data does not include the fields we need --" " the release and track number.") result = None else: # Here we're searching for a track. result = mb.askMB(self.fieldName, titleTag, track, ["release", "tracknumber", "artist"]) return result
def getMBKnownData(self, track): """Query MB using known data. To find a tracktotal we... Need: A release Can Use: A date, an artist""" if not "release" in track.metadata: log("The currently known data does not include the field we need --" " the release.") result = None else: result = mb.askMB(self.fieldName, None, track, ["release", "artist", "date"]) if result: result = result.zfill(2) return result
def getMBKnownData(self, track): """Query MB using known data. To find a tracknumber we... Need: A track title Can Use: A release, an artist""" if not "title" in track.metadata: log("The currently known data does not include the field we need --" " the track title.") result = None else: result = mb.askMB(self.fieldName, None, track, ["title", "artist", "release"]) if result: result = result.zfill(2) return result
def getMBTagKnownData(self, track): """Query MB using known data and the current tag.""" artistTag = tagging.getTag(track.filePath, "artist") if not artistTag: log("The current tag is empty.") result = None elif not "release" in track.metadata: log("The currently known data does not include the field we need --" " the release.") result = None else: result = mb.askMB(self.fieldName, artistTag, track, ["release", "date", "tracktotal"]) return result
def getMBKnownData(self, track): """Query MB using known data. We can find the artist from the release. To do this we... Need: release Can Use: date, tracktotal Might Use: tracks""" if not "release" in track.metadata: log("The currently known data does not include the field we need --" " the release.") result = None else: result = mb.askMB(self.fieldName, None, track, ["release", "date", "tracktotal", "tracks"]) return result
def getMBPUID(puid, field): """Return the metainformation given from MusicBrainz via PUID.""" if not puid: log("Cannot perform lookup because we never found a PUID.") return None query = mbws.Query() params = [mbws.TrackFilter(puid=puid, limit=1)] result = contactMB(query.getTracks, params) if not result: log("MusicBrainz did not recognize the PUID.") return None if field == "artist": return result[0].getTrack().getArtist().getName() elif field == "title": return result[0].getTrack().getTitle()
def getMBTagKnownData(self, track): """Query MB using known data and the current tag.""" releaseTag = tagging.getTag(track.filePath, "release") if not releaseTag: log("The current tag is empty.") result = None elif not ("artist" in track.metadata or "date" in track.metadata or "title" in track.metadata): log("The currently known data does not include the fields we need --" " the artist or the date or the track titles.") result = None else: result = mb.askMB(self.fieldName, releaseTag, track, ["artist", "date", "tracks", "tracktotal"]) return result
def getMBFilenameKnownData(self, track): """Match track number from filename against MB using known data.""" tracknumber = self.getFilename(track) if not tracknumber: log("Unable to match because no track number was found in filename.") result = None elif not "title" in track.metadata: log("The currently known data does not include the fields we need --" " the track title.") result = None else: result = mb.askMB(self.fieldName, tracknumber, track, ["title", "artist", "release", "tracknumber"]) if result: result = result.zfill(2) return result
def getMBTagKnownData(self, track): """Query MB using known data and the current tag.""" tracknumberTag = tagging.getTag(track.filePath, "tracknumber") if not tracknumberTag: log("The current tag is empty.") result = None elif not "title" in track.metadata: log("The currently known data does not include the fields we need --" " the track title.") result = None else: result = mb.askMB(self.fieldName, tracknumberTag, track, ["title", "artist", "release", "tracknumber"]) if result: result = result.zfill(2) return result
def getMBFilenameKnownData(self, track): """Match track number from filename against MB using known data.""" tracknumber = self.getFilename(track) if not tracknumber: log("Unable to match because no track number was found in filename." ) result = None elif not "title" in track.metadata: log("The currently known data does not include the fields we need --" " the track title.") result = None else: result = mb.askMB(self.fieldName, tracknumber, track, ["title", "artist", "release", "tracknumber"]) if result: result = result.zfill(2) return result
def gatherMetadata(self): """Iterate through Finders until success or stagnation.""" while self.queue: for finder in self.queue: field = finder.fieldName with logSection("\nAttempting to determine the %s using %d sources." % (field, len(finder.getters))): success = finder.run(self.release) if not success: log("Failed to determine the %s. Will try again next round.\n" % field) self.nextRoundQueue.append(finder) else: log("Successfully determined the %s.\n" % field) if self.queue == self.nextRoundQueue: log("No progress has been made. The metadata gathering process " "has failed.\n") failedFields = [finder.fieldName for finder in self.queue] raise ReleaseManagerError, "Unable to determine: %s" % failedFields else: self.queue = self.nextRoundQueue self.nextRoundQueue = []
def gatherMetadata(self): """Iterate through Finders until success or stagnation.""" while self.queue: for finder in self.queue: field = finder.fieldName with logSection( "\nAttempting to determine the %s using %d sources." % (field, len(finder.getters))): success = finder.run(self.release) if not success: log("Failed to determine the %s. Will try again next round.\n" % field) self.nextRoundQueue.append(finder) else: log("Successfully determined the %s.\n" % field) if self.queue == self.nextRoundQueue: log("No progress has been made. The metadata gathering process " "has failed.\n") failedFields = [finder.fieldName for finder in self.queue] raise ReleaseManagerError, "Unable to determine: %s" % failedFields else: self.queue = self.nextRoundQueue self.nextRoundQueue = []
def run(self, release): """Gather track-specific data and find a consensus.""" results = [] for track in release.tracks: with logSection("Attempting to determine %s for %s." % (self.fieldName, quote(track.fileName))): data = [] for (getter, weight) in self.getters: flowcontrol.checkpoint() log(" ") data.append((getter(track), weight, getter.__name__, quote(track.fileName))) self.logResults(data) consensus = self.findConsensus(data) log("\n\n\n") if consensus: results.append((track, consensus)) else: return False log(" ") for (track, consensus) in results: track.storeData(self.fieldName, consensus) return True
def getMBKnownData(self, track): """Query MB using known data. We can find the title from the release and a tracknumber. To do this we... Need: release, tracknumber Can Use: date, artist, tracktotal""" if not ("release" in track.metadata and "tracknumber" in track.metadata): log("The currently known data does not include the fields we need --" " the release and track number.") result = None else: # We can use these extra fields because we are searching for a # release, not a track. result = mb.askMB( self.fieldName, None, track, ["release", "tracknumber", "date", "artist", "tracktotal"]) return result
def executeQuery(field, match, preFilter, postFilter): """Runs a MusicBrainz query from start to finish. Starts with finding which query function to use and finishing with extracting the correct data.""" query, queryFunction, queryFilter = getFunctionAndFilter(field, match) queryFilter = applyParams(queryFilter, preFilter, match) log("Field: %s" % field) if preFilter: log("Pre: %s" % preFilter) if postFilter: log("Post: %s" % postFilter) if match: log("Match: %s" % match) finalResult = None results = contactMB(queryFunction, [queryFilter]) results = requireDesiredInfo(field, results) if results: result = postProcessResults(results, field, **postFilter) if result: finalResult = parseResult(result, field) log("Result: %s\n" % finalResult) return finalResult
def writeTags(self): """Clear the current track tags and write what we've found.""" log("Clearing current tags.") tagging.clearTags(self.filePath) log("Writing these tags:") for field in self.metadata: log(" %s%s" % (field.ljust(20), self.metadata[field])) tagging.setTag(self.filePath, field, self.metadata[field])
def contactMB(func, params, depth=0): """Robustly connect to MusicBrainz through the MB WebService.""" time.sleep(depth * 2) try: result = func(*params) except Exception, e: if depth < 3: log("Received error: %s." % quote(str(e))) log("Waiting, then trying again.") result = contactMB(func, params, depth + 1) else: log("Failed 3 times. Returning None.") result = None
def convert(audioFilePaths): """Convert undesirable audio formats into ogg. Takes a list of audio files and converts each to ogg using appropriate commands. These commands (mac, oggenc, mpc123) must be present.""" for audioFilePath in audioFilePaths: fileName = os.path.basename(audioFilePath) with logSection("Converting %s." % quote(fileName)): filePathWithoutExtension, extension = os.path.splitext( audioFilePath) commands = convertorCommands[extension] success = True for command in commands: cmd = [ arg.replace("$$", filePathWithoutExtension) for arg in command ] log(" ".join(cmd)) try: p = subprocess.Popen(cmd) p.wait() except OSError: log("% command not found." % cmd[0]) success = False break if p.returncode != 0: success = False break if not success: # FIXME: Should we reject this file or this entire directory? log("Unable to convert %s." % quote(fileName)) functions.rejectItem(audioFilePath) else: functions.deleteItem(audioFilePath) if len(commands) > 1: # If we created an intermediate wav file functions.deleteItem(filePathWithoutExtension + ".wav", True)
def extract(archivePaths): """Extract archives using appropriate utility. Takes a list of paths to archives and for each: Creates a directory with the same name as the archive, without extension. Chooses the utility to use for extraction based on the archive's extension. Attempts to extract the archive into the newly created directory. If the extraction fails, the directory is deleted and the archive rejected. If the extraction succeeds, the archive is discarded.""" for archivePath in archivePaths: fileName = os.path.basename(archivePath) with logSection("Extracting %s." % quote(fileName)): destDirectoryPath, ext = os.path.splitext(archivePath) if not os.path.exists(destDirectoryPath): os.mkdir(destDirectoryPath) command = extractorCommands[ext.lower()][:] for (i, arg) in enumerate(command): if arg == "$a": command[i] = archivePath elif arg == "$d": command[i] = destDirectoryPath log(" ".join(command)) try: p = subprocess.Popen(command) p.wait() success = p.returncode == 0 except OSError: log("%s command not found." % command[0]) success = False if not success: log("Unable to extract %s." % quote(archivePath)) functions.deleteItem(destDirectoryPath) functions.rejectItem(archivePath) else: functions.deleteItem(archivePath)
def findFuzzyMatch(field, match, track, preFilter, postFilter): """Fuzzily match unreliable data (from tags and filename) to MusicBrainz. Tags and filenames especially may contain special characters or extraneous data which will make a MusicBrainz search fail. This function removes special characters and, if the full string does not match, splits it based on a delimiters list and tries the substrings. Example: Filename: "2000 -*- The Better Life (Advance) -[[EAK-group]]-" Initial search for full string fails. String is broken into substrings. Substrings: "2000", "The Better Life", "Advance", "EAK", "group" Without any other filters "The Better Life" and "Advance" will both match and unable to choose one over the other, we will fail. With a filter (like the artist or date) then only "The Better Life" will match and the search will succeed. Fuzzy matching is only used for artist, release and title fields, because these are the only fields with strings to fuzzily match against.""" if isinstance(match, FilepathString): log("Splitting path into directory and file name, then trying each.") dirName, fileName = os.path.split(match) dirResult = executeQuery(field, dirName, preFilter, postFilter) if dirResult: # We won't look to see if the filename matches, because even if it # did, the directory generally has better odds of containing # an artist or release anyway. (We know we are looking for an # artist or release, because only requests for those fields pass in # a filepath. Track title requests just pass in the file name.) return dirResult fileResult = executeQuery(field, fileName, preFilter, postFilter) if fileResult: return fileResult else: result = executeQuery(field, match, preFilter, postFilter) if result: return result delimiters = r"[/()\-~+_\[\]\{\}*]" substrings = re.split(delimiters, match) substrings = [string.strip() for string in substrings if string.strip()] log("MB did not find a match for the full string.") log("Searching for a match in substrings.") log("Substrings: %s\n" % substrings) matches = set() whatFromWhere = {} for substring in substrings: result = executeQuery(field, substring, preFilter, postFilter) if result: whatFromWhere[result] = substring matches.add(result) if len(matches) > 1: # If we have more than one result, attempt to remove matches which # probably are not correct until we have only one match left or we run # out of methods for removing bogus results. # Potentially bogus results are removed in the order of the likelihood that # they are incorrect. # # The current filters (in order): # - result is very different from substring # - result looks like tracknumber or year # - result is digits # - result is (about) equal to already known artist, release or title # - substring was digits # TODO: # Order tests correctly. # Use difflib in addition to aboutEqual. # Use two levels of delimiters. # Add filter to remove results which are (about) equal to one another. # # Order #1 (filter all results, filter all substring) # - result looks like tracknumber or year # - result is digits # - result is (about) equal to already known artist, release or title # # - substring looked like tracknumber or year # - substring was digits # - substring was (about) equal artist, release, title # # Order #2 (filter result then substring, then next filter) # - result looks like tracknumber or year # - substring looked like tracknumber or year # # - result is digits # - substring was digits # # - result is (about) equal to already known artist, release or title # - substring was ... artist, release, title log("Multiple substrings matched: %s" % matches) log("Removing matches which are probably wrong.") # Remove matches which are either a tracknumber or a year. # Tracknumbers are identified by being one or two digits (possibly with # leading zero) under 99. # Years are four consecutive digits between 1600 and current year + 1. for match in matches.copy(): if len(matches) > 1: if match.isdigit(): num = int(match) if functions.isTrackNumber(num) or functions.isDate(num): matches.remove(match) else: break # Remove matches which are just digits. for match in matches.copy(): if len(matches) > 1: if match.isdigit(): matches.remove(match) else: break # Remove results which came from strings of digits. for match in matches.copy(): if len(matches) > 1: if whatFromWhere[match].isdigit(): matches.remove(match) else: break # If we still have more than one result, than we will remove values that # are known to be correct for a different field. In particular, we'll # look at the artist, album and title fields and remove matches # equivalent to those fields - in that order. relatedFields = ["artist", "release", "title"] relatedFields.remove(field) relatedData = [] for field in relatedFields: if field in track.metadata: relatedData.append(track.metadata[field]) # Remove matches which are the same as the already known artist, # release or title intelligently. # TODO: Figure out how to make TODOs highlighted in yellow. def equal(match, datum): return match == datum def inside(match, datum): return datum.lower() in match.lower() if len(matches) > 1: for datum in relatedData: for equivalenceFunc in (equal, aboutEqual, inside): for match in matches.copy(): if len(matches) > 1: if equivalenceFunc(match, datum): matches.remove(match) else: break # Remove matches which are signficantly different than the substring # they came from. for match in matches.copy(): if len(matches) > 1: diff = difflib.get_close_matches(whatFromWhere[match], [match]) if not diff: matches.remove(match) else: break if len(matches) == 1: match = matches.pop() log("MB matched a string to a %s: %s" % (field, quote(match))) return match else: log("%d substrings matched." % len(matches)) if matches: log("Unable to select between them.") log("Filtered matches: %s" % matches) log("Fuzzy matching failed.") return u""