def getHybridRelease(fullreleaselist): """ Returns a dictionary of best group of tracks from the list of releases & earliest release date """ if len(fullreleaselist) == 0: raise Exception("getHybridRelease was called with an empty fullreleaselist") sortable_release_list = [] for release in fullreleaselist: formats = {"2xVinyl": "2", "Vinyl": "2", "CD": "0", "Cassette": "3", "2xCD": "1", "Digital Media": "0"} countries = {"US": "0", "GB": "1", "JP": "2"} try: format = int(formats[release["Format"]]) except: format = 3 try: country = int(countries[release["Country"]]) except: country = 3 release_dict = { "hasasin": bool(release["AlbumASIN"]), "asin": release["AlbumASIN"], "trackscount": len(release["Tracks"]), "releaseid": release["ReleaseID"], "releasedate": release["ReleaseDate"], "format": format, "country": country, "tracks": release["Tracks"], } sortable_release_list.append(release_dict) # necessary to make dates that miss the month and/or day show up after full dates def getSortableReleaseDate(releaseDate): if releaseDate == None: return "None" # change this value to change the sorting behaviour of none, returning 'None' will put it at the top # which was normal behaviour for pre-ngs versions if releaseDate.count("-") == 2: return releaseDate elif releaseDate.count("-") == 1: return releaseDate + "32" else: return releaseDate + "13-32" sortable_release_list.sort(key=lambda x: getSortableReleaseDate(x["releasedate"])) average_tracks = sum(x["trackscount"] for x in sortable_release_list) / float(len(sortable_release_list)) for item in sortable_release_list: item["trackscount_delta"] = abs(average_tracks - item["trackscount"]) a = helpers.multikeysort(sortable_release_list, ["-hasasin", "country", "format", "trackscount_delta"]) release_dict = { "ReleaseDate": sortable_release_list[0]["releasedate"], "Tracks": a[0]["tracks"], "AlbumASIN": a[0]["asin"], } return release_dict
for track in releaseResult.tracks: tracks.append({ 'number': i, 'title': track.title, 'id': u.extractUuid(track.id), 'url': track.id, 'duration': track.duration }) i += 1 release_dict['tracks'] = tracks releaselist.append(release_dict) a = multikeysort(releaselist, ['-hasasin', 'country', 'format', 'trackscount']) release_dict = {'releaseid' :a[0]['releaseid'], 'releasedate' : releaselist[0]['releasedate'], 'trackcount' : a[0]['trackscount'], 'tracks' : a[0]['tracks'], 'asin' : a[0]['asin'], 'releaselist' : releaselist } return release_dict def getRelease(releaseid): """ Deep release search to get track info """
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False, artistScan=False): if cron and not headphones.CONFIG.LIBRARYSCAN: return if not dir: if not headphones.CONFIG.MUSIC_DIR: return else: dir = headphones.CONFIG.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append or artistScan: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() new_artists = [] logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up bad filepaths. Queries can take some time, ensure all results are loaded before processing if ArtistID: tracks = myDB.action( 'SELECT Location FROM alltracks WHERE ArtistID = ? AND Location IS NOT NULL UNION SELECT Location FROM tracks WHERE ArtistID = ? AND Location ' 'IS NOT NULL', [ArtistID, ArtistID]) else: tracks = myDB.action( 'SELECT Location FROM alltracks WHERE Location IS NOT NULL UNION SELECT Location FROM tracks WHERE Location IS NOT NULL' ) locations = [] for track in tracks: locations.append(track['Location']) for location in locations: encoded_track_string = location.encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, location]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, location]) if ArtistName: del_have_tracks = myDB.select( 'SELECT Location, Matched, ArtistName FROM have WHERE ArtistName = ? COLLATE NOCASE', [ArtistName]) else: del_have_tracks = myDB.select( 'SELECT Location, Matched, ArtistName FROM have') locations = [] for track in del_have_tracks: locations.append([track['Location'], track['ArtistName']]) for location in locations: encoded_track_string = location[0].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): if location[1]: # Make sure deleted files get accounted for when updating artist track counts new_artists.append(location[1]) myDB.action('DELETE FROM have WHERE Location=?', [location[0]]) logger.info( 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) bitrates = [] song_list = [] latest_subdirectory = [] new_song_count = 0 file_count = 0 for r, d, f in helpers.walk_directory(dir): # Filter paths based on config. Note that these methods work directly # on the inputs helpers.path_filter_patterns(d, headphones.CONFIG.IGNORED_FOLDERS, r) helpers.path_filter_patterns(f, headphones.CONFIG.IGNORED_FILES, r) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) elif latest_subdirectory[file_count] != latest_subdirectory[ file_count - 1] and file_count != 0: logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): logger.warning( "Cannot read media file '%s', skipping. It may be corrupted or not a media file.", unicode_song_path) continue except IOError: logger.warning( "Cannnot read media file '%s', skipping. Does the file exists?", unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.clean_name(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None controlValueDict = {'Location': unicode_song_path} newValueDict = { 'TrackID': f.mb_trackid, # 'ReleaseID' : f.mb_albumid, 'ArtistName': f_artist, 'AlbumTitle': f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre': f.genre, 'Date': f.date, 'TrackTitle': f.title, 'BitRate': f.bitrate, 'Format': f.format, 'CleanName': CleanName } # song_list.append(song_dict) check_exist_song = myDB.action( "SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() # Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: # This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: if check_exist_song[ 'ArtistName'] != f_artist or check_exist_song[ 'AlbumTitle'] != f.album or check_exist_song[ 'TrackTitle'] != f.title: # Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song[ 'ArtistName']: new_artists.append(f_artist) elif f_artist and f_artist == check_exist_song['ArtistName'] and \ check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) new_song_count += 1 else: # This track information hasn't changed if f_artist and check_exist_song[ 'Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action( "SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) total_number_of_songs = \ myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches # song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] last_completion_percentage = 0 prev_artist_name = None artistid = None for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: logger.info("Now matching songs by %s" % song['ArtistName']) song_count += 1 completion_percentage = math.floor( float(song_count) / total_number_of_songs * 1000) / 10 if completion_percentage >= (last_completion_percentage + 10): logger.info("Track matching is " + str(completion_percentage) + "% complete") last_completion_percentage = completion_percentage # THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT # EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, # ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM # (IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. albumid = None if song['ArtistName'] and song['CleanName']: artist_name = song['ArtistName'] clean_name = song['CleanName'] # Only update if artist is in the db if artist_name != prev_artist_name: prev_artist_name = artist_name artistid = None artist_lookup = "\"" + artist_name.replace("\"", "\"\"") + "\"" try: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM artists WHERE ArtistName LIKE ' + artist_lookup + '') except: dbartist = None if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM tracks WHERE CleanName = ?', [clean_name]) if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM alltracks WHERE CleanName = ?', [clean_name]) if not dbartist: clean_artist = helpers.clean_name(artist_name) if clean_artist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM tracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM alltracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) if dbartist: artistid = dbartist[0][0] if artistid: # This was previously using Artist, Album, Title with a SELECT LIKE ? and was not using an index # (Possible issue: https://stackoverflow.com/questions/37845854/python-sqlite3-not-using-index-with-like) # Now selects/updates using CleanName index (may have to revert if not working) # matching on CleanName should be enough, ensure it's the same artist just in case # Update tracks track = myDB.action( 'SELECT AlbumID, ArtistName FROM tracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() if track: albumid = track['AlbumID'] myDB.action( 'UPDATE tracks SET Location = ?, BitRate = ?, Format = ? WHERE CleanName = ? AND ArtistID = ?', [ song['Location'], song['BitRate'], song['Format'], clean_name, artistid ]) # Update alltracks alltrack = myDB.action( 'SELECT AlbumID, ArtistName FROM alltracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() if alltrack: albumid = alltrack['AlbumID'] myDB.action( 'UPDATE alltracks SET Location = ?, BitRate = ?, Format = ? WHERE CleanName = ? AND ArtistID = ?', [ song['Location'], song['BitRate'], song['Format'], clean_name, artistid ]) # Update have controlValueDict2 = {'Location': song['Location']} if albumid: newValueDict2 = {'Matched': albumid} else: newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) # myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append or artistScan: logger.info('Updating scanned artist track counts') # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() # # Don't think we need to do this, check the db instead below # # # artist scan # if ArtistName: # current_artists = [[ArtistName]] # # directory scan # else: # current_artists = myDB.select('SELECT ArtistName, ArtistID FROM artists WHERE ArtistName IS NOT NULL') # # # There was a bug where artists with special characters (-,') would show up in new artists. # # # artist_list = scanned artists not in the db # artist_list = [ # x for x in unique_artists # if helpers.clean_name(x).lower() not in [ # helpers.clean_name(y[0]).lower() # for y in current_artists # ] # ] # # # artists_checked = scanned artists that exist in the db # artists_checked = [ # x for x in unique_artists # if helpers.clean_name(x).lower() in [ # helpers.clean_name(y[0]).lower() # for y in current_artists # ] # ] new_artist_list = [] for artist in unique_artists: if not artist: continue logger.info('Processing artist: %s' % artist) # check if artist is already in the db artist_lookup = "\"" + artist.replace("\"", "\"\"") + "\"" try: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM artists WHERE ArtistName LIKE ' + artist_lookup + '') except: dbartist = None if not dbartist: clean_artist = helpers.clean_name(artist) if clean_artist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM tracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM alltracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) # new artist not in db, add to list if not dbartist: new_artist_list.append(artist) else: # artist in db, update have track counts artistid = dbartist[0][0] # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this # havetracks = ( # len(myDB.select( # 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', # [artist])) + len(myDB.select( # 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', # [artist])) # ) try: havetracks = (len( myDB.select( 'SELECT ArtistID From tracks WHERE ArtistID = ? AND Location IS NOT NULL', [artistid]) ) + len( myDB.select( 'SELECT ArtistName FROM have WHERE ArtistName LIKE ' + artist_lookup + ' AND Matched = "Failed"'))) except Exception as e: logger.warn('Error updating counts for artist: %s: %s' % (artist, e)) # Note: some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) if havetracks: myDB.action( 'UPDATE artists SET HaveTracks = ? WHERE ArtistID = ?', [havetracks, artistid]) # Update albums to downloaded update_album_status(ArtistID=artistid) logger.info('Found %i new artists' % len(new_artist_list)) # Add scanned artists not in the db if new_artist_list: if headphones.CONFIG.AUTO_ADD_ARTISTS: logger.info('Importing %i new artists' % len(new_artist_list)) importer.artistlist_to_mbids(new_artist_list) else: logger.info( 'To add these artists, go to Manage->Manage New Artists') # myDB.action('DELETE from newartists') for artist in new_artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) if headphones.CONFIG.DETECT_BITRATE and bitrates: headphones.CONFIG.PREFERRED_BITRATE = sum(bitrates) / len( bitrates) / 1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') artist_lookup = "\"" + ArtistName.replace("\"", "\"\"") + "\"" try: havetracks = len( myDB.select( 'SELECT ArtistID FROM tracks WHERE ArtistID = ? AND Location IS NOT NULL', [ArtistID]) ) + len( myDB.select( 'SELECT ArtistName FROM have WHERE ArtistName LIKE ' + artist_lookup + ' AND Matched = "Failed"')) except Exception as e: logger.warn('Error updating counts for artist: %s: %s' % (ArtistName, e)) if havetracks: myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) # Moved above to call for each artist # if not append: # update_album_status() if not append and not artistScan: lastfm.getSimilar() if ArtistName: logger.info('Scanning complete for artist: %s', ArtistName) else: logger.info('Library scan complete')
def getHybridRelease(fullreleaselist): """ Returns a dictionary of best group of tracks from the list of releases & earliest release date """ sortable_release_list = [] for release in fullreleaselist: formats = { '2xVinyl': '2', 'Vinyl': '2', 'CD': '0', 'Cassette': '3', '2xCD': '1', 'Digital Media': '0' } countries = { 'US': '0', 'GB': '1', 'JP': '2', } try: format = int(formats[release['Format']]) except: format = 3 try: country = int(countries[release['Country']]) except: country = 3 release_dict = { 'hasasin': bool(release['AlbumASIN']), 'asin': release['AlbumASIN'], 'trackscount': len(release['Tracks']), 'releaseid': release['ReleaseID'], 'releasedate': release['ReleaseDate'], 'format': format, 'country': country, 'tracks': release['Tracks'] } sortable_release_list.append(release_dict) #necessary to make dates that miss the month and/or day show up after full dates def getSortableReleaseDate(releaseDate): if releaseDate == None: return 'None';#change this value to change the sorting behaviour of none, returning 'None' will put it at the top #which was normal behaviour for pre-ngs versions if releaseDate.count('-') == 2: return releaseDate elif releaseDate.count('-') == 1: return releaseDate + '32' else: return releaseDate + '13-32' sortable_release_list.sort(key=lambda x:getSortableReleaseDate(x['releasedate'])) average_tracks = sum(x['trackscount'] for x in sortable_release_list) / float(len(sortable_release_list)) for item in sortable_release_list: item['trackscount_delta'] = abs(average_tracks - item['trackscount']) a = helpers.multikeysort(sortable_release_list, ['-hasasin', 'country', 'format', 'trackscount_delta']) release_dict = {'ReleaseDate' : sortable_release_list[0]['releasedate'], 'Tracks' : a[0]['tracks'], 'AlbumASIN' : a[0]['asin'] } return release_dict
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None): if not dir: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING)) return myDB = db.DBConnection() if not append: # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) myDB.action('DELETE from have') logger.info('Scanning music directory: %s' % dir) new_artists = [] bitrates = [] song_list = [] for r,d,f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) song_dict = { 'TrackID' : f.mb_trackid, 'ReleaseID' : f.mb_albumid, 'ArtistName' : f_artist, 'AlbumTitle' : f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre' : f.genre, 'Date' : f.date, 'TrackTitle' : f.title, 'BitRate' : f.bitrate, 'Format' : f.format, 'Location' : unicode_song_path } song_list.append(song_dict) # Now we start track matching total_number_of_songs = len(song_list) logger.info("Found " + str(total_number_of_songs) + " tracks in: '" + dir + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 for song in song_list: song_count += 1 completion_percentage = float(song_count)/total_number_of_songs * 100 if completion_percentage%10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") # If the track has a trackid & releaseid (beets: albumid) that the most surefire way # of identifying a track to a specific release so we'll use that first if song['TrackID'] and song['ReleaseID']: # Check both the tracks table & alltracks table in case they haven't populated the alltracks table yet track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() # It might be the case that the alltracks table isn't populated yet, so maybe we can only find a match in the tracks table if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() if track: # Use TrackID & ReleaseID here since there can only be one possible match with a TrackID & ReleaseID query combo controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } # Insert it into the Headphones hybrid release (ReleaseID == AlbumID) hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both the tracks table and the alltracks table using the controlValueDict and hybridControlValueDict myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Matched. Move on to the next one: continue # If we can't find it with TrackID & ReleaseID, next most specific will be # releaseid + tracktitle, although perhaps less reliable due to a higher # likelihood of variations in the song title (e.g. feat. artists) if song['ReleaseID'] and song['TrackTitle']: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if track: # There can also only be one match for this query as well (although it might be on both the tracks and alltracks table) # So use both TrackID & ReleaseID as the control values controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both tables here as well myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Done continue # Next most specific will be the opposite: a TrackID and an AlbumTitle # TrackIDs span multiple releases so if something is on an official album # and a compilation, for example, this will match it to the right one # However - there may be multiple matches here if song['TrackID'] and song['AlbumTitle']: # Even though there might be multiple matches, we just need to grab one to confirm a match track = myDB.action('SELECT TrackID, AlbumTitle from alltracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, AlbumTitle from tracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if track: # Don't need the hybridControlValueDict here since ReleaseID is not unique controlValueDict = { 'TrackID' : track['TrackID'], 'AlbumTitle' : track['AlbumTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Next most specific is the ArtistName + AlbumTitle + TrackTitle combo (but probably # even more unreliable than the previous queries, and might span multiple releases) if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if track: controlValueDict = { 'ArtistName' : track['ArtistName'], 'AlbumTitle' : track['AlbumTitle'], 'TrackTitle' : track['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Use the "CleanName" (ArtistName + AlbumTitle + TrackTitle stripped of punctuation, capitalization, etc) # This is more reliable than the former but requires some string manipulation so we'll do it only # if we can't find a match with the original data if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) track = myDB.action('SELECT CleanName from alltracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if not track: track = myDB.action('SELECT CleanName from tracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if track: controlValueDict = { 'CleanName' : track['CleanName'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Match on TrackID alone if we can't find it using any of the above methods. This method is reliable # but spans multiple releases - but that's why we're putting at the beginning as a last resort. If a track # with more specific information exists in the library, it'll overwrite these values if song['TrackID']: track = myDB.action('SELECT TrackID from alltracks WHERE TrackID=?', [song['TrackID']]).fetchone() if not track: track = myDB.action('SELECT TrackID from tracks WHERE TrackID=?', [song['TrackID']]).fetchone() if track: controlValueDict = { 'TrackID' : track['TrackID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release if song['ArtistName']: new_artists.append(song['ArtistName']) else: continue # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) else: continue myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir) if not append: # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating current artist track counts') for artist in current_artists: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID])
def getHybridRelease(fullreleaselist): """ Returns a dictionary of best group of tracks from the list of releases & earliest release date """ if len(fullreleaselist) == 0: raise Exception("getHybridRelease was called with an empty fullreleaselist") sortable_release_list = [] for release in fullreleaselist: formats = { '2xVinyl': '2', 'Vinyl': '2', 'CD': '0', 'Cassette': '3', '2xCD': '1', 'Digital Media': '0' } countries = { 'US': '0', 'GB': '1', 'JP': '2', } try: format = int(formats[release['Format']]) except: format = 3 try: country = int(countries[release['Country']]) except: country = 3 release_dict = { 'hasasin': bool(release['AlbumASIN']), 'asin': release['AlbumASIN'], 'trackscount': len(release['Tracks']), 'releaseid': release['ReleaseID'], 'releasedate': release['ReleaseDate'], 'format': format, 'country': country, 'tracks': release['Tracks'] } sortable_release_list.append(release_dict) #necessary to make dates that miss the month and/or day show up after full dates def getSortableReleaseDate(releaseDate): if releaseDate == None: return 'None';#change this value to change the sorting behaviour of none, returning 'None' will put it at the top #which was normal behaviour for pre-ngs versions if releaseDate.count('-') == 2: return releaseDate elif releaseDate.count('-') == 1: return releaseDate + '32' else: return releaseDate + '13-32' sortable_release_list.sort(key=lambda x:getSortableReleaseDate(x['releasedate'])) average_tracks = sum(x['trackscount'] for x in sortable_release_list) / float(len(sortable_release_list)) for item in sortable_release_list: item['trackscount_delta'] = abs(average_tracks - item['trackscount']) a = helpers.multikeysort(sortable_release_list, ['-hasasin', 'country', 'format', 'trackscount_delta']) release_dict = {'ReleaseDate' : sortable_release_list[0]['releasedate'], 'Tracks' : a[0]['tracks'], 'AlbumASIN' : a[0]['asin'] } return release_dict
time.sleep(1) if not releaseResult: continue time.sleep(1) release_dict = { 'asin': bool(releaseResult.asin), 'tracks': len(releaseResult.getTracks()), 'releaseid': u.extractUuid(releaseResult.id) } releaselist.append(release_dict) a = multikeysort(releaselist, ['-asin', '-tracks']) releaseid = a[0]['releaseid'] return releaseid def getRelease(releaseid): """ Deep release search to get track info """ with mb_lock: release = {} inc = ws.ReleaseIncludes(tracks=True, releaseEvents=True)
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False): if cron and not headphones.LIBRARYSCAN: return if not dir: if not headphones.MUSIC_DIR: return else: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() if not append: # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from alltracks WHERE Location IS NOT NULL') for track in tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING) if not os.path.isfile(encoded_track_string): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) del_have_tracks = myDB.select('SELECT Location, Matched from have') for track in del_have_tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING) if not os.path.isfile(encoded_track_string): myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) myDB.action('UPDATE have SET Matched=NULL WHERE Matched=?', [track['Matched']]) logger.info('File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) ###############myDB.action('DELETE from have') logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) new_artists = [] bitrates = [] song_list = [] new_song_count = 0 for r,d,f in os.walk(dir): #need to abuse slicing to get a copy of the list, doing it directly will skip the element after a deleted one #using a list comprehension will not work correctly for nested subdirectories (os.walk keeps its original list) for directory in d[:]: if directory.startswith("."): d.remove(directory) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.cleanName(f_artist +' '+ f.album +' '+ f.title) else: CleanName = None controlValueDict = {'Location' : unicode_song_path} newValueDict = { 'TrackID' : f.mb_trackid, #'ReleaseID' : f.mb_albumid, 'ArtistName' : f_artist, 'AlbumTitle' : f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre' : f.genre, 'Date' : f.date, 'TrackTitle' : f.title, 'BitRate' : f.bitrate, 'Format' : f.format, 'CleanName' : CleanName } #song_list.append(song_dict) check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() #Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: myDB.upsert("have", newValueDict, controlValueDict) new_song_count+=1 elif check_exist_song['ArtistName'] != f_artist or check_exist_song['AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) new_song_count+=1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir+"%"]) total_number_of_songs = myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir+"%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " unmatched tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches ##############song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count-1] and song_count !=0: logger.info("Now matching songs by %s" % song['ArtistName']) #print song['ArtistName']+' - '+song['AlbumTitle']+' - '+song['TrackTitle'] song_count += 1 completion_percentage = float(song_count)/total_number_of_songs * 100 if completion_percentage%10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") #THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT #EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, #ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM #(IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if track: controlValueDict = { 'ArtistName' : track['ArtistName'], 'AlbumTitle' : track['AlbumTitle'], 'TrackTitle' : track['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = { 'ArtistName' : song['ArtistName'], 'AlbumTitle' : song['AlbumTitle'], 'TrackTitle' : song['TrackTitle'] } newValueDict2 = { 'Matched' : track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: track = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if track: controlValueDict = { 'CleanName' : track['CleanName']} newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = { 'CleanName' : song['CleanName']} newValueDict2 = { 'Matched' : track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) alltrack = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if alltrack: controlValueDict = { 'ArtistName' : alltrack['ArtistName'], 'AlbumTitle' : alltrack['AlbumTitle'], 'TrackTitle' : alltrack['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = { 'ArtistName' : song['ArtistName'], 'AlbumTitle' : song['AlbumTitle'], 'TrackTitle' : song['TrackTitle'] } newValueDict2 = { 'Matched' : alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: alltrack = myDB.action('SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if alltrack: controlValueDict = { 'CleanName' : alltrack['CleanName']} newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = { 'CleanName' : song['CleanName']} newValueDict2 = { 'Matched' : alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release if song['ArtistName']: new_artists.append(song['ArtistName']) else: continue #######myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') #There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [f for f in unique_artists if helpers.cleanName(f).lower() not in [helpers.cleanName(x[0]).lower() for x in current_artists]] # Update track counts logger.info('Updating current artist track counts') for artist in current_artists: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched IS NULL', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched IS NULL', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) update_album_status() logger.info('Library scan complete')
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False, artistScan=False): if cron and not headphones.CONFIG.LIBRARYSCAN: return if not dir: if not headphones.CONFIG.MUSIC_DIR: return else: dir = headphones.CONFIG.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append or artistScan: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() new_artists = [] logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up bad filepaths tracks = myDB.select( 'SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL') for track in tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) del_have_tracks = myDB.select('SELECT Location, Matched, ArtistName from have') for track in del_have_tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): if track['ArtistName']: # Make sure deleted files get accounted for when updating artist track counts new_artists.append(track['ArtistName']) myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) logger.info( 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode( headphones.SYS_ENCODING, 'replace')) bitrates = [] song_list = [] latest_subdirectory = [] new_song_count = 0 file_count = 0 for r, d, f in helpers.walk_directory(dir): # Filter paths based on config. Note that these methods work directly # on the inputs helpers.path_filter_patterns(d, headphones.CONFIG.IGNORED_FOLDERS, r) helpers.path_filter_patterns(f, headphones.CONFIG.IGNORED_FILES, r) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': logger.info("[%s] Now scanning subdirectory %s" % ( dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) elif latest_subdirectory[file_count] != latest_subdirectory[ file_count - 1] and file_count != 0: logger.info("[%s] Now scanning subdirectory %s" % ( dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): logger.warning( "Cannot read media file '%s', skipping. It may be corrupted or not a media file.", unicode_song_path) continue except IOError: logger.warning("Cannnot read media file '%s', skipping. Does the file exists?", unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.clean_name(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None controlValueDict = {'Location': unicode_song_path} newValueDict = {'TrackID': f.mb_trackid, # 'ReleaseID' : f.mb_albumid, 'ArtistName': f_artist, 'AlbumTitle': f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre': f.genre, 'Date': f.date, 'TrackTitle': f.title, 'BitRate': f.bitrate, 'Format': f.format, 'CleanName': CleanName } # song_list.append(song_dict) check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() # Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: # This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: if check_exist_song['ArtistName'] != f_artist or check_exist_song[ 'AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: # Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song['ArtistName']: new_artists.append(f_artist) elif f_artist and f_artist == check_exist_song['ArtistName'] and \ check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) new_song_count += 1 else: # This track information hasn't changed if f_artist and check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) total_number_of_songs = \ myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode( headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches # song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] last_completion_percentage = 0 for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: logger.info("Now matching songs by %s" % song['ArtistName']) song_count += 1 completion_percentage = math.floor(float(song_count) / total_number_of_songs * 1000) / 10 if completion_percentage >= (last_completion_percentage + 10): logger.info("Track matching is " + str(completion_percentage) + "% complete") last_completion_percentage = completion_percentage # THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT # EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, # ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM # (IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() have_updated = False if track: controlValueDict = {'ArtistName': track['ArtistName'], 'AlbumTitle': track['AlbumTitle'], 'TrackTitle': track['TrackTitle']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: track = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if track: controlValueDict = {'CleanName': track['CleanName']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True alltrack = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if alltrack: controlValueDict = {'ArtistName': alltrack['ArtistName'], 'AlbumTitle': alltrack['AlbumTitle'], 'TrackTitle': alltrack['TrackTitle']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: alltrack = myDB.action( 'SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if alltrack: controlValueDict = {'CleanName': alltrack['CleanName']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: # alltracks may not exist if adding album manually, have should only be set to failed if not already updated in tracks if not have_updated: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) # myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append or artistScan: logger.info('Updating scanned artist track counts') # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') # There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [ x for x in unique_artists if helpers.clean_name(x).lower() not in [ helpers.clean_name(y[0]).lower() for y in current_artists ] ] artists_checked = [ x for x in unique_artists if helpers.clean_name(x).lower() in [ helpers.clean_name(y[0]).lower() for y in current_artists ] ] # Update track counts for artist in artists_checked: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = ( len(myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', [artist])) + len(myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artist])) ) # Note: some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistName=?', [havetracks, artist]) logger.info('Found %i new artists' % len(artist_list)) if artist_list: if headphones.CONFIG.AUTO_ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') # myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) if headphones.CONFIG.DETECT_BITRATE and bitrates: headphones.CONFIG.PREFERRED_BITRATE = sum(bitrates) / len(bitrates) / 1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len( myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) if not append: update_album_status() if not append and not artistScan: lastfm.getSimilar() logger.info('Library scan complete')
}) i += 1 release_dict['tracks'] = tracks releaselist.append(release_dict) average_tracks = sum(x['trackscount'] for x in releaselist) / float(len(releaselist)) for item in releaselist: item['trackscount_delta'] = abs(average_tracks - item['trackscount']) a = multikeysort( releaselist, ['-hasasin', 'country', 'format', 'trackscount_delta']) release_dict = { 'releaseid': a[0]['releaseid'], 'releasedate': releaselist[0]['releasedate'], 'trackcount': a[0]['trackscount'], 'tracks': a[0]['tracks'], 'asin': a[0]['asin'], 'releaselist': releaselist, 'artist_name': releaseGroup.artist.name, 'artist_id': u.extractUuid(releaseGroup.artist.id), 'title': releaseGroup.title, 'type': u.extractFragment(releaseGroup.type) }
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False): if cron and not headphones.LIBRARYSCAN: return if not dir: if not headphones.MUSIC_DIR: return else: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() new_artists = [] logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up bad filepaths tracks = myDB.select( 'SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL' ) for track in tracks: encoded_track_string = track['Location'].encode( headphones.SYS_ENCODING) if not os.path.isfile(encoded_track_string): myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) del_have_tracks = myDB.select( 'SELECT Location, Matched, ArtistName from have') for track in del_have_tracks: encoded_track_string = track['Location'].encode( headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): if track['ArtistName']: #Make sure deleted files get accounted for when updating artist track counts new_artists.append(track['ArtistName']) myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) logger.info( 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) ###############myDB.action('DELETE from have') bitrates = [] song_list = [] new_song_count = 0 file_count = 0 latest_subdirectory = [] for r, d, f in os.walk(dir): #need to abuse slicing to get a copy of the list, doing it directly will skip the element after a deleted one #using a list comprehension will not work correctly for nested subdirectories (os.walk keeps its original list) for directory in d[:]: if directory.startswith("."): d.remove(directory) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) elif latest_subdirectory[file_count] != latest_subdirectory[ file_count - 1] and file_count != 0: logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): logger.error( "Cannot read file media file '%s'. It may be corrupted or not a media file.", unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.cleanName(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None controlValueDict = {'Location': unicode_song_path} newValueDict = { 'TrackID': f.mb_trackid, #'ReleaseID' : f.mb_albumid, 'ArtistName': f_artist, 'AlbumTitle': f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre': f.genre, 'Date': f.date, 'TrackTitle': f.title, 'BitRate': f.bitrate, 'Format': f.format, 'CleanName': CleanName } #song_list.append(song_dict) check_exist_song = myDB.action( "SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() #Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: #This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: if check_exist_song[ 'ArtistName'] != f_artist or check_exist_song[ 'AlbumTitle'] != f.album or check_exist_song[ 'TrackTitle'] != f.title: #Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song[ 'ArtistName']: new_artists.append(f_artist) elif f_artist and f_artist == check_exist_song[ 'ArtistName'] and check_exist_song[ 'Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) new_song_count += 1 else: #This track information hasn't changed if f_artist and check_exist_song[ 'Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action( "SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) total_number_of_songs = myDB.action( "SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches ##############song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: logger.info("Now matching songs by %s" % song['ArtistName']) #print song['ArtistName']+' - '+song['AlbumTitle']+' - '+song['TrackTitle'] song_count += 1 completion_percentage = float(song_count) / total_number_of_songs * 100 if completion_percentage % 10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") #THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT #EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, #ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM #(IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle'] ]).fetchone() have_updated = False if track: controlValueDict = { 'ArtistName': track['ArtistName'], 'AlbumTitle': track['AlbumTitle'], 'TrackTitle': track['TrackTitle'] } newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: track = myDB.action( 'SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if track: controlValueDict = {'CleanName': track['CleanName']} newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True alltrack = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle'] ]).fetchone() if alltrack: controlValueDict = { 'ArtistName': alltrack['ArtistName'], 'AlbumTitle': alltrack['AlbumTitle'], 'TrackTitle': alltrack['TrackTitle'] } newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: alltrack = myDB.action( 'SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if alltrack: controlValueDict = {'CleanName': alltrack['CleanName']} newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: # alltracks may not exist if adding album manually, have should only be set to failed if not already updated in tracks if not have_updated: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) #######myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: logger.info('Updating scanned artist track counts') # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select( 'SELECT ArtistName, ArtistID from artists') #There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [ f for f in unique_artists if helpers.cleanName(f).lower() not in [helpers.cleanName(x[0]).lower() for x in current_artists] ] artists_checked = [ f for f in unique_artists if helpers.cleanName(f).lower() in [helpers.cleanName(x[0]).lower() for x in current_artists] ] # Update track counts for artist in artists_checked: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len( myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', [artist]) ) + len( myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artist])) #Note, some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistName=?', [havetracks, artist]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info( 'To add these artists, go to Manage->Manage New Artists') #myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates) / len(bitrates) / 1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len( myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID]) ) + len( myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) if not append: update_album_status() lastfm.getSimilar() logger.info('Library scan complete')
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None): if not dir: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() if not append: # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) myDB.action('DELETE from have') logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) new_artists = [] bitrates = [] song_list = [] for r,d,f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) song_dict = { 'TrackID' : f.mb_trackid, 'ReleaseID' : f.mb_albumid, 'ArtistName' : f_artist, 'AlbumTitle' : f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre' : f.genre, 'Date' : f.date, 'TrackTitle' : f.title, 'BitRate' : f.bitrate, 'Format' : f.format, 'Location' : unicode_song_path } song_list.append(song_dict) # Now we start track matching total_number_of_songs = len(song_list) logger.info("Found " + str(total_number_of_songs) + " tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 for song in song_list: song_count += 1 completion_percentage = float(song_count)/total_number_of_songs * 100 if completion_percentage%10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") # If the track has a trackid & releaseid (beets: albumid) that the most surefire way # of identifying a track to a specific release so we'll use that first if song['TrackID'] and song['ReleaseID']: # Check both the tracks table & alltracks table in case they haven't populated the alltracks table yet track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() # It might be the case that the alltracks table isn't populated yet, so maybe we can only find a match in the tracks table if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() if track: # Use TrackID & ReleaseID here since there can only be one possible match with a TrackID & ReleaseID query combo controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } # Insert it into the Headphones hybrid release (ReleaseID == AlbumID) hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both the tracks table and the alltracks table using the controlValueDict and hybridControlValueDict myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Matched. Move on to the next one: continue # If we can't find it with TrackID & ReleaseID, next most specific will be # releaseid + tracktitle, although perhaps less reliable due to a higher # likelihood of variations in the song title (e.g. feat. artists) if song['ReleaseID'] and song['TrackTitle']: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if track: # There can also only be one match for this query as well (although it might be on both the tracks and alltracks table) # So use both TrackID & ReleaseID as the control values controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both tables here as well myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Done continue # Next most specific will be the opposite: a TrackID and an AlbumTitle # TrackIDs span multiple releases so if something is on an official album # and a compilation, for example, this will match it to the right one # However - there may be multiple matches here if song['TrackID'] and song['AlbumTitle']: # Even though there might be multiple matches, we just need to grab one to confirm a match track = myDB.action('SELECT TrackID, AlbumTitle from alltracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, AlbumTitle from tracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if track: # Don't need the hybridControlValueDict here since ReleaseID is not unique controlValueDict = { 'TrackID' : track['TrackID'], 'AlbumTitle' : track['AlbumTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Next most specific is the ArtistName + AlbumTitle + TrackTitle combo (but probably # even more unreliable than the previous queries, and might span multiple releases) if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if track: controlValueDict = { 'ArtistName' : track['ArtistName'], 'AlbumTitle' : track['AlbumTitle'], 'TrackTitle' : track['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Use the "CleanName" (ArtistName + AlbumTitle + TrackTitle stripped of punctuation, capitalization, etc) # This is more reliable than the former but requires some string manipulation so we'll do it only # if we can't find a match with the original data if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) track = myDB.action('SELECT CleanName from alltracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if not track: track = myDB.action('SELECT CleanName from tracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if track: controlValueDict = { 'CleanName' : track['CleanName'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Match on TrackID alone if we can't find it using any of the above methods. This method is reliable # but spans multiple releases - but that's why we're putting at the beginning as a last resort. If a track # with more specific information exists in the library, it'll overwrite these values if song['TrackID']: track = myDB.action('SELECT TrackID from alltracks WHERE TrackID=?', [song['TrackID']]).fetchone() if not track: track = myDB.action('SELECT TrackID from tracks WHERE TrackID=?', [song['TrackID']]).fetchone() if track: controlValueDict = { 'TrackID' : track['TrackID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release if song['ArtistName']: new_artists.append(song['ArtistName']) else: continue # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) else: continue myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating current artist track counts') for artist in current_artists: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID])
"url": track.id, "duration": track.duration, } ) i += 1 release_dict["tracks"] = tracks releaselist.append(release_dict) average_tracks = sum(x["trackscount"] for x in releaselist) / float(len(releaselist)) for item in releaselist: item["trackscount_delta"] = abs(average_tracks - item["trackscount"]) a = multikeysort(releaselist, ["-hasasin", "country", "format", "trackscount_delta"]) release_dict = { "releaseid": a[0]["releaseid"], "releasedate": releaselist[0]["releasedate"], "trackcount": a[0]["trackscount"], "tracks": a[0]["tracks"], "asin": a[0]["asin"], "releaselist": releaselist, "artist_name": releaseGroup.artist.name, "artist_id": u.extractUuid(releaseGroup.artist.id), "title": releaseGroup.title, "type": u.extractFragment(releaseGroup.type), } return release_dict