def distance(items, info): """Determines how "significant" an album metadata change would be. Returns a float in [0.0,1.0]. The list of items must be ordered. """ cur_artist, cur_album = current_metadata(items) cur_artist = cur_artist or '' cur_album = cur_album or '' # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 # Artist/album metadata. dist += string_dist(cur_artist, info['artist']) * ARTIST_WEIGHT dist_max += ARTIST_WEIGHT dist += string_dist(cur_album, info['album']) * ALBUM_WEIGHT dist_max += ALBUM_WEIGHT # Track distances. for i, (item, track_data) in enumerate(zip(items, info['tracks'])): dist += track_distance(item, track_data, i+1) * TRACK_WEIGHT dist_max += TRACK_WEIGHT # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, info) dist += plugin_d dist_max += plugin_dm # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 else: return dist/dist_max
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ cur_artist, cur_album, _ = current_metadata(items) cur_artist = cur_artist or '' cur_album = cur_album or '' # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 # Artist/album metadata. if not album_info.va: dist += string_dist(cur_artist, album_info.artist) * ARTIST_WEIGHT dist_max += ARTIST_WEIGHT dist += string_dist(cur_album, album_info.album) * ALBUM_WEIGHT dist_max += ALBUM_WEIGHT # Matched track distances. for item, track in mapping.iteritems(): dist += track_distance(item, track, album_info.va) * TRACK_WEIGHT dist_max += TRACK_WEIGHT # Extra and unmatched tracks. for track in set(album_info.tracks) - set(mapping.values()): dist += MISSING_WEIGHT dist_max += MISSING_WEIGHT for item in set(items) - set(mapping.keys()): dist += UNMATCHED_WEIGHT dist_max += UNMATCHED_WEIGHT # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) dist += plugin_d dist_max += plugin_dm # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 else: return dist / dist_max
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ cur_artist, cur_album, _ = current_metadata(items) cur_artist = cur_artist or '' cur_album = cur_album or '' # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 # Artist/album metadata. if not album_info.va: dist += string_dist(cur_artist, album_info.artist) * ARTIST_WEIGHT dist_max += ARTIST_WEIGHT dist += string_dist(cur_album, album_info.album) * ALBUM_WEIGHT dist_max += ALBUM_WEIGHT # Matched track distances. for item, track in mapping.iteritems(): dist += track_distance(item, track, album_info.va) * TRACK_WEIGHT dist_max += TRACK_WEIGHT # Extra and unmatched tracks. for track in set(album_info.tracks) - set(mapping.values()): dist += MISSING_WEIGHT dist_max += MISSING_WEIGHT for item in set(items) - set(mapping.keys()): dist += UNMATCHED_WEIGHT dist_max += UNMATCHED_WEIGHT # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) dist += plugin_d dist_max += plugin_dm # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 else: return dist / dist_max
def distance(items, album_info): """Determines how "significant" an album metadata change would be. Returns a float in [0.0,1.0]. The list of items must be ordered. """ cur_artist, cur_album, _ = current_metadata(items) cur_artist = cur_artist or '' cur_album = cur_album or '' # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 # Artist/album metadata. if not album_info.va: dist += string_dist(cur_artist, album_info.artist) * ARTIST_WEIGHT dist_max += ARTIST_WEIGHT dist += string_dist(cur_album, album_info.album) * ALBUM_WEIGHT dist_max += ALBUM_WEIGHT # Track distances. for i, (item, track_info) in enumerate(zip(items, album_info.tracks)): if item: dist += track_distance(item, track_info, i+1, album_info.va) * \ TRACK_WEIGHT dist_max += TRACK_WEIGHT else: dist += MISSING_WEIGHT dist_max += MISSING_WEIGHT # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, album_info) dist += plugin_d dist_max += plugin_dm # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 else: return dist / dist_max
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ likelies, _ = current_metadata(items) dist = hooks.Distance() # Artist, if not various. if not album_info.va: dist.add_string('artist', likelies['artist'], album_info.artist) # Album. dist.add_string('album', likelies['album'], album_info.album) # Current or preferred media. if album_info.media: # Preferred media options. patterns = config['match']['preferred']['media'].as_str_seq() options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] if options: dist.add_priority('media', album_info.media, options) # Current media. elif likelies['media']: dist.add_equality('media', album_info.media, likelies['media']) # Mediums. if likelies['disctotal'] and album_info.mediums: dist.add_number('mediums', likelies['disctotal'], album_info.mediums) # Prefer earliest release. if album_info.year and config['match']['preferred']['original_year']: # Assume 1889 (earliest first gramophone discs) if we don't know the # original year. original = album_info.original_year or 1889 diff = abs(album_info.year - original) diff_max = abs(datetime.date.today().year - original) dist.add_ratio('year', diff, diff_max) # Year. elif likelies['year'] and album_info.year: if likelies['year'] in (album_info.year, album_info.original_year): # No penalty for matching release or original year. dist.add('year', 0.0) elif album_info.original_year: # Prefer matchest closest to the release year. diff = abs(likelies['year'] - album_info.year) diff_max = abs(datetime.date.today().year - album_info.original_year) dist.add_ratio('year', diff, diff_max) else: # Full penalty when there is no original year. dist.add('year', 1.0) # Preferred countries. patterns = config['match']['preferred']['countries'].as_str_seq() options = [re.compile(pat, re.I) for pat in patterns] if album_info.country and options: dist.add_priority('country', album_info.country, options) # Country. elif likelies['country'] and album_info.country: dist.add_string('country', likelies['country'], album_info.country) # Label. if likelies['label'] and album_info.label: dist.add_string('label', likelies['label'], album_info.label) # Catalog number. if likelies['catalognum'] and album_info.catalognum: dist.add_string('catalognum', likelies['catalognum'], album_info.catalognum) # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: dist.add_string('albumdisambig', likelies['albumdisambig'], album_info.albumdisambig) # Album ID. if likelies['mb_albumid']: dist.add_equality('album_id', likelies['mb_albumid'], album_info.album_id) # Tracks. dist.tracks = {} for item, track in mapping.iteritems(): dist.tracks[track] = track_distance(item, track, album_info.va) dist.add('tracks', dist.tracks[track].distance) # Missing tracks. for i in range(len(album_info.tracks) - len(mapping)): dist.add('missing_tracks', 1.0) # Unmatched tracks. for i in range(len(items) - len(mapping)): dist.add('unmatched_tracks', 1.0) # Plugins. dist.update(plugins.album_distance(items, album_info, mapping)) return dist
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ likelies, _ = current_metadata(items) dist = hooks.Distance() # Artist, if not various. if not album_info.va: dist.add_string('artist', likelies['artist'], album_info.artist) # Album. dist.add_string('album', likelies['album'], album_info.album) # Current or preferred media. if album_info.media: # Preferred media options. patterns = config['match']['preferred']['media'].as_str_seq() options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] if options: dist.add_priority('media', album_info.media, options) # Current media. elif likelies['media']: dist.add_equality('media', album_info.media, likelies['media']) # Mediums. if likelies['disctotal'] and album_info.mediums: dist.add_number('mediums', likelies['disctotal'], album_info.mediums) # Prefer earliest release. if album_info.year and config['match']['preferred']['original_year']: # Assume 1889 (earliest first gramophone discs) if we don't know the # original year. original = album_info.original_year or 1889 diff = abs(album_info.year - original) diff_max = abs(datetime.date.today().year - original) dist.add_ratio('year', diff, diff_max) # Year. elif likelies['year'] and album_info.year: if likelies['year'] in (album_info.year, album_info.original_year): # No penalty for matching release or original year. dist.add('year', 0.0) elif album_info.original_year: # Prefer matchest closest to the release year. diff = abs(likelies['year'] - album_info.year) diff_max = abs(datetime.date.today().year - album_info.original_year) dist.add_ratio('year', diff, diff_max) else: # Full penalty when there is no original year. dist.add('year', 1.0) # Preferred countries. patterns = config['match']['preferred']['countries'].as_str_seq() options = [re.compile(pat, re.I) for pat in patterns] if album_info.country and options: dist.add_priority('country', album_info.country, options) # Country. elif likelies['country'] and album_info.country: dist.add_string('country', likelies['country'], album_info.country) # Label. if likelies['label'] and album_info.label: dist.add_string('label', likelies['label'], album_info.label) # Catalog number. if likelies['catalognum'] and album_info.catalognum: dist.add_string('catalognum', likelies['catalognum'], album_info.catalognum) # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: dist.add_string('albumdisambig', likelies['albumdisambig'], album_info.albumdisambig) # Album ID. if likelies['mb_albumid']: dist.add_equality('album_id', likelies['mb_albumid'], album_info.album_id) # Tracks. dist.tracks = {} for item, track in mapping.iteritems(): dist.tracks[track] = track_distance(item, track, album_info.va) dist.add('tracks', dist.tracks[track].distance) # Missing tracks. for i in range(len(album_info.tracks) - len(mapping)): dist.add('missing_tracks', 1.0) # Unmatched tracks. for i in range(len(items) - len(mapping)): dist.add('unmatched_tracks', 1.0) # Plugins. dist.update(plugins.album_distance(items, album_info, mapping)) return dist
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ likelies, _ = current_metadata(items) # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 # Artist/album metadata. if not album_info.va: dist += string_dist(likelies['artist'], album_info.artist) * \ weights['artist'].as_number() dist_max += weights['artist'].as_number() dist += string_dist(likelies['album'], album_info.album) * \ weights['album'].as_number() dist_max += weights['album'].as_number() # Year. No penalty for matching release or original year. if likelies['year'] and album_info.year: if likelies['year'] not in (album_info.year, album_info.original_year): diff = abs(album_info.year - likelies['year']) if diff: dist += (1.0 - 1.0 / diff) * weights['year'].as_number() dist_max += weights['year'].as_number() # Actual or preferred media. if album_info.media: compare_media = likelies['media'] or \ config['match']['preferred_media'].get() if compare_media and compare_media.lower() != album_info.media.lower(): dist += weights['media'].as_number() dist_max += weights['media'].as_number() # MusicBrainz album ID. if likelies['mb_albumid']: if likelies['mb_albumid'] != album_info.album_id: dist += weights['album_id'].as_number() dist_max += weights['album_id'].as_number() # Apply a small penalty for differences across many minor metadata. This # helps prioritise releases that are nearly identical. if likelies['disctotal']: if likelies['disctotal'] != album_info.mediums: dist += weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['label'] and album_info.label: dist += string_dist(likelies['label'], album_info.label) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['catalognum'] and album_info.catalognum: dist += string_dist(likelies['catalognum'], album_info.catalognum) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['country'] and album_info.country: dist += string_dist(likelies['country'], album_info.country) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['albumdisambig'] and album_info.albumdisambig: dist += string_dist(likelies['albumdisambig'], album_info.albumdisambig) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() # Matched track distances. for item, track in mapping.iteritems(): dist += track_distance(item, track, album_info.va) * \ weights['track'].as_number() dist_max += weights['track'].as_number() # Extra and unmatched tracks. for track in set(album_info.tracks) - set(mapping.values()): dist += weights['missing'].as_number() dist_max += weights['missing'].as_number() for item in set(items) - set(mapping.keys()): dist += weights['unmatched'].as_number() dist_max += weights['unmatched'].as_number() # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) dist += plugin_d dist_max += plugin_dm # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 else: return dist / dist_max
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ likelies, _ = current_metadata(items) # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 # Artist/album metadata. if not album_info.va: dist += string_dist(likelies['artist'], album_info.artist) * \ weights['artist'].as_number() dist_max += weights['artist'].as_number() dist += string_dist(likelies['album'], album_info.album) * \ weights['album'].as_number() dist_max += weights['album'].as_number() # Year. No penalty for matching release or original year. if likelies['year'] and album_info.year: if likelies['year'] not in (album_info.year, album_info.original_year): diff = abs(album_info.year - likelies['year']) if diff: dist += (1.0 - 1.0 / diff) * weights['year'].as_number() dist_max += weights['year'].as_number() # Actual or preferred media. preferred_media = config['match']['preferred_media'].get() if likelies['media'] and album_info.media: dist += string_dist(likelies['media'], album_info.media) * \ weights['media'].as_number() dist_max += weights['media'].as_number() elif album_info.media and preferred_media: dist += string_dist(album_info.media, preferred_media) * \ weights['media'].as_number() dist_max += weights['media'].as_number() # MusicBrainz album ID. if likelies['mb_albumid']: if likelies['mb_albumid'] != album_info.album_id: dist += weights['album_id'].as_number() dist_max += weights['album_id'].as_number() # Apply a small penalty for differences across many minor metadata. This # helps prioritise releases that are nearly identical. if likelies['disctotal']: if likelies['disctotal'] != album_info.mediums: dist += weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['label'] and album_info.label: dist += string_dist(likelies['label'], album_info.label) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['catalognum'] and album_info.catalognum: dist += string_dist(likelies['catalognum'], album_info.catalognum) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['country'] and album_info.country: dist += string_dist(likelies['country'], album_info.country) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() if likelies['albumdisambig'] and album_info.albumdisambig: dist += string_dist(likelies['albumdisambig'], album_info.albumdisambig) * \ weights['minor'].as_number() dist_max += weights['minor'].as_number() # Matched track distances. for item, track in mapping.iteritems(): dist += track_distance(item, track, album_info.va) * \ weights['track'].as_number() dist_max += weights['track'].as_number() # Extra and unmatched tracks. for track in set(album_info.tracks) - set(mapping.values()): dist += weights['missing'].as_number() dist_max += weights['missing'].as_number() for item in set(items) - set(mapping.keys()): dist += weights['unmatched'].as_number() dist_max += weights['unmatched'].as_number() # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) dist += plugin_d dist_max += plugin_dm # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 else: return dist / dist_max
def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ likelies, consensus = current_metadata(items) dist = hooks.Distance() # Artist, if not various. if not album_info.va: dist.add_string('artist', likelies['artist'], album_info.artist) # Album. dist.add_string('album', likelies['album'], album_info.album) # Current or preferred media. if album_info.media: # Preferred media options. patterns = config['match']['preferred']['media'].as_str_seq() options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] if options: dist.add_priority('media', album_info.media, options) # Current media. elif likelies['media']: dist.add_equality('media', album_info.media, likelies['media']) # Mediums. if likelies['disctotal'] and album_info.mediums: dist.add_number('mediums', likelies['disctotal'], album_info.mediums) # Prefer earliest release. if album_info.year and config['match']['preferred']['original_year']: # Assume 1889 (earliest first gramophone discs) if we don't know the # original year. original = album_info.original_year or 1889 diff = abs(album_info.year - original) diff_max = abs(datetime.date.today().year - original) dist.add_ratio('year', diff, diff_max) # Year. elif likelies['year'] and album_info.year: if likelies['year'] in (album_info.year, album_info.original_year): # No penalty for matching release or original year. dist.add('year', 0.0) elif album_info.original_year: # Prefer matchest closest to the release year. diff = abs(likelies['year'] - album_info.year) diff_max = abs(datetime.date.today().year - album_info.original_year) dist.add_ratio('year', diff, diff_max) else: # Full penalty when there is no original year. dist.add('year', 1.0) # Preferred countries. patterns = config['match']['preferred']['countries'].as_str_seq() options = [re.compile(pat, re.I) for pat in patterns] if album_info.country and options: dist.add_priority('country', album_info.country, options) # Country. elif likelies['country'] and album_info.country: dist.add_string('country', likelies['country'], album_info.country) # Label. if likelies['label'] and album_info.label: dist.add_string('label', likelies['label'], album_info.label) # Catalog number. if likelies['catalognum'] and album_info.catalognum: dist.add_string('catalognum', likelies['catalognum'], album_info.catalognum) # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: dist.add_string('albumdisambig', likelies['albumdisambig'], album_info.albumdisambig) # Album ID. if likelies['mb_albumid']: dist.add_equality('album_id', likelies['mb_albumid'], album_info.album_id) # Tracks. dist.tracks = {} for item, track in mapping.items(): dist.tracks[track] = track_distance(item, track, album_info.va) dist.add('tracks', dist.tracks[track].distance) # Track totals medium_totals = {} for track in album_info.tracks: if track.medium_total and track.medium not in medium_totals: medium_totals[track.medium] = track.medium_total medium_all_total = sum(medium_totals.values()) # If we have a consensus, and it's the same as the all-disc total, # short-circuit the logic and take the simple approach. if consensus and likelies['tracktotal'] and likelies['tracktotal'] == medium_all_total: pass else: # The imported track may have tags for per disc numbering or not, # so check for both. def item_disc(i): return i.disc or 0 items_by_disc = sorted(mapping.keys(), key=item_disc) grouped_items = groupby(items_by_disc, item_disc) for disc, disc_items in grouped_items: medium_total = medium_totals.get(disc, 0) tracktotals = set(i.tracktotal for i in disc_items if i.tracktotal) for tracktotal in tracktotals: if tracktotal != medium_all_total and medium_total: dist.add_number('tracktotal', medium_total, tracktotal) # Missing tracks. for i in range(len(album_info.tracks) - len(mapping)): dist.add('missing_tracks', 1.0) # Unmatched tracks. for i in range(len(items) - len(mapping)): dist.add('unmatched_tracks', 1.0) # Plugins. dist.update(plugins.album_distance(items, album_info, mapping)) return dist