def test_findbestmatch(self): no_match = SimMatchTest(similarity=-1, name='no_match') best_match = find_best_match(self.candidates, no_match) self.assertEqual(best_match.result.name, 'b') self.assertEqual(best_match.similarity, 0.75) self.assertEqual(best_match.num_results, 4)
def _match_to_track(self, tracks, threshold=0): # multiple matches -- calculate similarities to each of them def candidates(): for track in tracks: yield self.metadata.compare_to_track(track, self.comparison_weights) no_match = SimMatchTrack(similarity=-1, releasegroup=None, release=None, track=None) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: return None else: track_id = best_match.result.track['id'] release_group_id, release_id, node = None, None, None acoustid = best_match.result.track.get('acoustid', None) if best_match.result.release: release_group_id = best_match.result.releasegroup['id'] release_id = best_match.result.release['id'] elif 'title' in best_match.result.track: node = best_match.result.track return (track_id, release_group_id, release_id, acoustid, node)
def test_findbestmatch(self): no_match = SimMatchTest(similarity=-1, name='no_match') best_match = find_best_match(self.candidates, no_match) self.assertEqual(best_match.result.name, 'b') self.assertEqual(best_match.similarity, 0.75) self.assertEqual(best_match.num_results, 4)
def _match_files(files, tracks, unmatched_files, threshold=0, use_events_iter=False): """Match files to tracks on this album, based on metadata similarity or recordingid.""" if use_events_iter: # TODO: get rid of this completely at some point events_iter = process_events_iter else: def _events_iter(seq): return seq events_iter = _events_iter tracks_cache = defaultdict(lambda: None) def build_tracks_cache(): for track in tracks: tm_recordingid = track.orig_metadata['musicbrainz_recordingid'] tm_tracknumber = track.orig_metadata['tracknumber'] tm_discnumber = track.orig_metadata['discnumber'] for tup in ((tm_recordingid, tm_tracknumber, tm_discnumber), (tm_recordingid, tm_tracknumber), (tm_recordingid, )): tracks_cache[tup] = track SimMatchAlbum = namedtuple('SimMatchAlbum', 'similarity track') no_match = SimMatchAlbum(similarity=-1, track=unmatched_files) for file in list(files): if file.state == File.REMOVED: continue # if we have a recordingid to match against, use that in priority recid = file.match_recordingid or file.metadata[ 'musicbrainz_recordingid'] if recid and mbid_validate(recid): if not tracks_cache: build_tracks_cache() tracknumber = file.metadata['tracknumber'] discnumber = file.metadata['discnumber'] track = (tracks_cache[(recid, tracknumber, discnumber)] or tracks_cache[(recid, tracknumber)] or tracks_cache[(recid, )]) if track: yield (file, track) continue # try to match by similarity def candidates(): for track in events_iter(tracks): similarity = track.metadata.compare(file.orig_metadata) if similarity >= threshold: yield SimMatchAlbum(similarity=similarity, track=track) best_match = find_best_match(candidates, no_match) yield (file, best_match.result.track)
def test_findbestmatch_nomatch(self): self.test_values = [] no_match = SimMatchTest(similarity=-1, name='no_match') best_match = find_best_match(self.candidates, no_match) self.assertEqual(best_match.result.name, 'no_match') self.assertEqual(best_match.similarity, -1)
def test_findbestmatch_nomatch(self): self.test_values = [] no_match = SimMatchTest(similarity=-1, name='no_match') best_match = find_best_match(self.candidates, no_match) self.assertEqual(best_match.result.name, 'no_match') self.assertEqual(best_match.similarity, -1) self.assertEqual(best_match.num_results, 0)
def _match_to_release(self, releases, threshold=0): # multiple matches -- calculate similarities to each of them def candidates(): for release in releases: match = self.metadata.compare_to_release(release, Cluster.comparison_weights) if match.similarity >= threshold: yield match no_match = SimMatchRelease(similarity=-1, release=None) best_match = find_best_match(candidates, no_match) return best_match.result.release
def _match_to_album(self, releases, threshold=0): # multiple matches -- calculate similarities to each of them def candidates(): for release in releases: yield self.metadata.compare_to_release(release, Cluster.comparison_weights) no_match = SimMatchRelease(similarity=-1, release=None) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: return None else: return best_match.result.release['id']
def _match_files(self, files, recordingid=None, threshold=0): """Match files to tracks on this album, based on metadata similarity or recordingid.""" tracks_cache = defaultdict(lambda: None) def build_tracks_cache(): for track in self.tracks: tm_recordingid = track.orig_metadata['musicbrainz_recordingid'] tm_tracknumber = track.orig_metadata['tracknumber'] tm_discnumber = track.orig_metadata['discnumber'] for tup in ( (tm_recordingid, tm_tracknumber, tm_discnumber), (tm_recordingid, tm_tracknumber), (tm_recordingid, )): tracks_cache[tup] = track SimMatchAlbum = namedtuple('SimMatchAlbum', 'similarity track') for file in list(files): if file.state == File.REMOVED: continue # if we have a recordingid to match against, use that in priority recid = recordingid or file.metadata['musicbrainz_recordingid'] if recid and mbid_validate(recid): if not tracks_cache: build_tracks_cache() tracknumber = file.metadata['tracknumber'] discnumber = file.metadata['discnumber'] track = (tracks_cache[(recid, tracknumber, discnumber)] or tracks_cache[(recid, tracknumber)] or tracks_cache[(recid, )]) if track: yield (file, track) continue # try to match by similarity def candidates(): for track in self.tracks: yield SimMatchAlbum( similarity=track.metadata.compare(file.orig_metadata), track=track ) QtCore.QCoreApplication.processEvents() no_match = SimMatchAlbum(similarity=-1, track=self.unmatched_files) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: yield (file, no_match.track) else: yield (file, best_match.result.track)
def _match_files(self, files, recordingid=None, threshold=0): """Match files to tracks on this album, based on metadata similarity or recordingid.""" tracks_cache = defaultdict(lambda: None) def build_tracks_cache(): for track in self.tracks: tm_recordingid = track.orig_metadata['musicbrainz_recordingid'] tm_tracknumber = track.orig_metadata['tracknumber'] tm_discnumber = track.orig_metadata['discnumber'] for tup in ( (tm_recordingid, tm_tracknumber, tm_discnumber), (tm_recordingid, tm_tracknumber), (tm_recordingid, )): tracks_cache[tup] = track SimMatchAlbum = namedtuple('SimMatchAlbum', 'similarity track') for file in list(files): if file.state == File.REMOVED: continue # if we have a recordingid to match against, use that in priority recid = recordingid or file.metadata['musicbrainz_recordingid'] if recid and mbid_validate(recid): if not tracks_cache: build_tracks_cache() tracknumber = file.metadata['tracknumber'] discnumber = file.metadata['discnumber'] track = (tracks_cache[(recid, tracknumber, discnumber)] or tracks_cache[(recid, tracknumber)] or tracks_cache[(recid, )]) if track: yield (file, track) continue # try to match by similarity def candidates(): for track in self.tracks: yield SimMatchAlbum( similarity=track.metadata.compare(file.orig_metadata), track=track ) no_match = SimMatchAlbum(similarity=-1, track=self.unmatched_files) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: yield (file, no_match.track) else: yield (file, best_match.result.track)
def _match_to_track(self, tracks, threshold=0): # multiple matches -- calculate similarities to each of them def candidates(): for track in tracks: yield self.metadata.compare_to_track(track, self.comparison_weights) no_match = SimMatchTrack(similarity=-1, releasegroup=None, release=None, track=None) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: return None else: track_id = best_match.result.track['id'] release_group_id, release_id, node = None, None, None if best_match.result.release: release_group_id = best_match.result.releasegroup['id'] release_id = best_match.result.release['id'] elif 'title' in best_match.result.track: node = best_match.result.track return (track_id, release_group_id, release_id, node)