def correct_album(self, album, artist=None): logger.debug('in correct_album({})'.format(album)) try: improved = self.online.generic('album', album, artist=artist).name except NotFoundOnline: try: improved = self.online.generic('album', improve_encoding(album), artist=artist).name except: return album if improved != album: decoded = improve_encoding(improved) if improved != decoded: improved = self.online.generic('artist', decoded).name print('REPLACING', album, 'WITH', improved) albums = self.sql.execute( 'select distinct album from songs').fetchall() albums = {i: normalcase(i) for i, in albums if i} query = normalcase(album) for i, j in albums.items(): if j == query: self.sql.execute( 'update or ignore songs set album=? where album=?', (improved, i)) self.sql.execute('delete from songs where album=?', (i, )) self.sql.commit() return improved
def test_brainz_album(self): album = self.data.album(onlinedata.BRAINZ, 'Mittelpunkt der Welt') self.assertEqual(normalcase(album.name), normalcase('Mittelpunkt der Welt')) self.assertEqual(normalcase(album.artist), normalcase('Element of Crime')) self.assertEqual(len(album.tracks()), 10)
def correct_artist(self, artist, force=False): logger.debug('in correct_artist({})'.format(artist)) if not force: already = self.sql.execute( 'select artist_as_online from songs where artist=?', (artist, )).fetchone() if already and already[0]: return artist try: improved = self.online.generic('artist', artist).name except NotFoundOnline: try: improved = self.online.generic('artist', improve_encoding(artist)).name except: return artist else: decoded = improve_encoding(improved) if improved != decoded: improved = self.online.generic('artist', decoded).name artists = self.sql.execute( 'select distinct artist from songs').fetchall() artists = {i: normalcase(i) for i, in artists if i} query = normalcase(artist) for i, j in artists.items(): if i != improved and j == query: print('REPLACING', i, 'WITH', improved) self.sql.execute( 'update or ignore songs set artist=?, artist_as_online=1, actual=0 where artist=?', (improved, i)) self.sql.execute('delete from songs where artist=?', (i, )) self.sql.commit() return improved
def generic_correction(self, what): assert (what in ['album', 'artist', 'track']) field = what if what != 'track' else 'title' cursor = self.sql.execute( 'select distinct {} from songs where {}_as_online=0'.format( field, field)) data = {i: i for i, in cursor} for i in data: data[i] = re.sub(' +', ' ', improve_encoding(i)) case_mapping = defaultdict(list) for i in data.values(): case_mapping[normalcase(i)].append(i) corrected_case = {} for i in case_mapping: if (len(case_mapping[i]) > 1 or case_mapping[i][0].isupper() or case_mapping[i][0].islower()): artist = '' if field != 'artist': artist = self.sql.execute( 'select artist from songs where {}=?'.format(field), (case_mapping[i][0], )).fetchone() if artist: artist = artist[0] try: corrected_case[i] = self.online.generic(what, i, artist=artist).name except NotFoundOnline: corrected_case[i] = case_mapping[i][0] else: corrected_case[i] = case_mapping[i][0] for old, new in data.items(): data[old] = corrected_case[normalcase(new)] for old, new in data.items(): if old != new: print('REPLACING', old, 'WITH', new) self.sql.execute( 'update or ignore songs set {}=?, actual=0 where {}=?'. format(field, field), (new, old)) self.sql.execute('delete from songs where {}=?'.format(field), (old, )) self.sql.execute('update songs set {}_as_online=1'.format(field)) self.sql.commit()
def fetch_tracks_for_artist(self, artist, count=15): inserted = self.sql.execute( 'select count (*) from songs').fetchone()[0] count = min(count, 100) try: artist = self.correct_artist(artist) except NotFoundOnline: print('0 songs added') return cursor = self.sql.execute('select title from songs where artist=?', (artist, )) known_tracks = {normalcase(i) for i, in cursor} suggestions = [] fetched_artist = self.online.artist(onlinedata.LASTFM, artist) suggestions += fetched_artist.tracks() if fetched_artist else [] if known_tracks: suggestions = [ i for i in suggestions if normalcase(i.name) not in known_tracks ] suggestions = [ i for i in suggestions if not any(j in normalcase(i.name) for j in known_tracks) ] only_with_album = [i for i in suggestions if i.album] if only_with_album: suggestions = only_with_album for song in suggestions[:count]: self.sql.execute( 'insert or ignore into songs' ' (artist, title, album, track, filename, has_file)' ' values (?, ?, ?, ?, ?, 0)', (artist, song.name, song.album, song.track, 'NOFILE' + ''.join(random.choice(string.hexdigits) for x in range(16)))) self.sql.commit() print( self.sql.execute('select count (*) from songs').fetchone()[0] - inserted, 'songs added')
def _search_album(self, provider, title, artist='', tracks=[], min_tracks=0): logger.info('In _search_album(provider={}, title={}, artist={})'.format(provider, title, artist)) RESULTS_TO_REVIEW = 5 if artist else 40 def searchlast(): if artist: return [i.item for i in self.artist(LASTFM, artist)._link.get_top_albums()] else: search_results = self.lastfm.search_for_album(title) return search_results.get_next_page() + search_results.get_next_page() search = [searchlast, lambda: brainz.search_releases(title, artist=artist)['release-list']][provider] Album = [LastAlbum, BrainzAlbum][provider] output = None try: output = search() except Exception as exc: logger.critical('Exception in search') logger.exception(exc) return None if output: for i, result in zip(range(RESULTS_TO_REVIEW), output): logger.info('Album: attempt #{}'.format(i + 1)) album = Album(result) if artist and diff(album.artist, artist) > 0.4: logger.info('Omitting because {} != {}'.format(album.artist, artist)) continue if diff(album.name, title) > 0.4: logger.info('Omitting because of title: {}'.format(album.name)) continue if min_tracks and len(album.tracks()) < min_tracks: logger.info('Omitting because of min_tracks: only {}'.format(len(album.tracks()))) continue if tracks: album_tracks = [normalcase(i.name) for i in album.tracks()] if any(known not in album_tracks for known in tracks): logger.info('Omitting because track not found') if False: logger.debug('fetched ' + repr(album_tracks) + '\n\n known ' + repr(tracks)) for known in tracks: if known not in album_tracks: logger.debug(known + ' not found in fetched') continue return album return None
def merge_artists(self): artists = self.sql.execute( 'select distinct artist from songs').fetchall() artists = {i: normalcase(i) for i, in artists if i} matches = {i: 0 for i in artists.values()} for i in artists.values(): for j in artists.values(): if (1.0 - levenshtein(i, j) / max(len(i), len(j))) > 0.9: matches[i] += 1 matches[j] += 1 to_correct = [i for i in matches if matches[i] > 2] for artist in to_correct: improved = self.online.generic('artist', artist) print('REPLACING', artist, 'WITH', improved) for i, j in artists.items(): if j == artist: self.sql.execute( 'update or ignore songs set artist=?, actual=0 where artist=?', (improved, i)) self.sql.execute('delete from songs where artist=?', (i, )) self.sql.commit()
def test_brainz_artist(self): artist = self.data.artist(onlinedata.BRAINZ, "Веня Д'ркин") self.assertEqual(normalcase(artist.name), normalcase("Веня Д'ркин")) with self.assertRaises(NotImplementedError): artist.tracks()
def test_brainz_song(self): song = self.data.song(onlinedata.BRAINZ, 'Алмазный британец') self.assertEqual(normalcase(song.name), normalcase('Алмазный британец')) self.assertEqual(normalcase(song.artist), normalcase('Ночные Снайперы'))
def test_lastfm_artist(self): artist = self.data.artist(onlinedata.LASTFM, "Веня Д'ркин") self.assertEqual(normalcase(artist.name), normalcase("Веня Д'ркин")) tracks = [i.name for i in artist.tracks()] for i in ['Маргарита', 'Кошка']: self.assertIn(i, tracks)
def test_lastfm_album(self): album = self.data.album(onlinedata.LASTFM, 'Wish You Were Here') self.assertEqual(normalcase(album.name), normalcase('Wish You Were Here')) self.assertEqual(normalcase(album.artist), normalcase('Pink Floyd')) self.assertEqual(len(album.tracks()), 5)
def test_lastfm_song(self): song = self.data.song(onlinedata.LASTFM, 'Алмазный британец') self.assertEqual(normalcase(song.name), normalcase('Алмазный британец')) self.assertEqual(normalcase(song.artist), normalcase('Ночные Снайперы'))
def fill_album(self, artist, albumname, only_correct=False): logger.debug('in fill_album({}, {})'.format(artist, albumname)) min_tracks = 0 tracknames = [] known_tracks = [] if artist: try: artist = self.correct_artist(artist) except NotFoundOnline: return known_tracks = self.sql.execute( 'select track, title from songs where artist=? and album=?', (artist, albumname)).fetchall() known_tracks = [(track, title, normalcase(title)) for track, title in known_tracks] if known_tracks: known_tracks.sort() min_tracks = max(known_tracks[-1][0], len(known_tracks)) tracknames = [i[2] for i in known_tracks] album = (self.online.album(onlinedata.BRAINZ, albumname, artist, tracknames, min_tracks) or self.online.album(onlinedata.LASTFM, albumname, artist, tracknames, min_tracks)) if not album: try: albumname = self.correct_album(albumname, artist=artist) except NotFoundOnline: print(albumname, 'not found online') return album = (self.online.album(onlinedata.BRAINZ, albumname, artist) or self.online.album(onlinedata.LASTFM, albumname, artist)) if not album: return if not artist: artist = album.artist for i, track in enumerate(album.tracks()): for idx, song, norm in known_tracks: if norm == normalcase(track.name): if i + 1 != idx or song != track.name: if i + 1 != idx: print('Song', song, ': ', idx, ' -> ', i + 1) if song != track.name: print('REPLACING', song, 'WITH', track.name) try: self.sql.execute( 'update songs set track=?, title=?, ' ' artist_as_online=1, album_as_online=1, title_as_online=1, actual=0' ' where title=? and artist=? and album=?', (i + 1, track.name, song, artist, albumname)) except sqlite3.IntegrityError: self.sql.execute( 'delete from songs where track=? and ' ' title=? and artist=? and album=?', (idx, song, artist, albumname)) break else: if not only_correct: try: self.sql.execute( 'insert into songs (track, artist, album, ' ' title, filename, has_file) ' 'values (?, ?, ?, ?, ?, 0) ', (i + 1, artist, albumname, track.name, 'NOFILE' + ''.join( random.choice(string.hexdigits) for x in range(16)))) except sqlite3.IntegrityError: print('Can not insert: artist ', artist, 'album ', albumname, 'title', track.name, 'track', i) self.sql.commit()