def libraryScan(self, dir=None): if not dir: dir = headphones.MUSIC_DIR try: dir = str(dir) except UnicodeEncodeError: dir = unicode(dir).encode('unicode_escape') if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir) return myDB = db.DBConnection() # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=? WHERE TrackID=?', [None, None, track['TrackID']]) logger.info('Scanning music directory: %s' % dir) new_artists = [] bitrates = [] myDB.action('DELETE from have') scrapeScan(dir) logger.info('Completed scanning of directory: %s' % dir) logger.info('Checking filepaths to see if we can find any matches') fileSystemScan() logger.info('Done checking empty filepaths') logger.info('Done syncing library with directory: %s' % dir) # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating track counts') for artist in current_artists: havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID like ? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False, artistScan=False): if cron and not headphones.CONFIG.LIBRARYSCAN: return if not dir: if not headphones.CONFIG.MUSIC_DIR: return else: dir = headphones.CONFIG.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append or artistScan: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() new_artists = [] logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up bad filepaths. Queries can take some time, ensure all results are loaded before processing if ArtistID: tracks = myDB.action( 'SELECT Location FROM alltracks WHERE ArtistID = ? AND Location IS NOT NULL UNION SELECT Location FROM tracks WHERE ArtistID = ? AND Location ' 'IS NOT NULL', [ArtistID, ArtistID]) else: tracks = myDB.action( 'SELECT Location FROM alltracks WHERE Location IS NOT NULL UNION SELECT Location FROM tracks WHERE Location IS NOT NULL' ) locations = [] for track in tracks: locations.append(track['Location']) for location in locations: encoded_track_string = location.encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, location]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, location]) if ArtistName: del_have_tracks = myDB.select( 'SELECT Location, Matched, ArtistName FROM have WHERE ArtistName = ? COLLATE NOCASE', [ArtistName]) else: del_have_tracks = myDB.select( 'SELECT Location, Matched, ArtistName FROM have') locations = [] for track in del_have_tracks: locations.append([track['Location'], track['ArtistName']]) for location in locations: encoded_track_string = location[0].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): if location[1]: # Make sure deleted files get accounted for when updating artist track counts new_artists.append(location[1]) myDB.action('DELETE FROM have WHERE Location=?', [location[0]]) logger.info( 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) bitrates = [] song_list = [] latest_subdirectory = [] new_song_count = 0 file_count = 0 for r, d, f in helpers.walk_directory(dir): # Filter paths based on config. Note that these methods work directly # on the inputs helpers.path_filter_patterns(d, headphones.CONFIG.IGNORED_FOLDERS, r) helpers.path_filter_patterns(f, headphones.CONFIG.IGNORED_FILES, r) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) elif latest_subdirectory[file_count] != latest_subdirectory[ file_count - 1] and file_count != 0: logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): logger.warning( "Cannot read media file '%s', skipping. It may be corrupted or not a media file.", unicode_song_path) continue except IOError: logger.warning( "Cannnot read media file '%s', skipping. Does the file exists?", unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.clean_name(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None controlValueDict = {'Location': unicode_song_path} newValueDict = { 'TrackID': f.mb_trackid, # 'ReleaseID' : f.mb_albumid, 'ArtistName': f_artist, 'AlbumTitle': f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre': f.genre, 'Date': f.date, 'TrackTitle': f.title, 'BitRate': f.bitrate, 'Format': f.format, 'CleanName': CleanName } # song_list.append(song_dict) check_exist_song = myDB.action( "SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() # Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: # This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: if check_exist_song[ 'ArtistName'] != f_artist or check_exist_song[ 'AlbumTitle'] != f.album or check_exist_song[ 'TrackTitle'] != f.title: # Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song[ 'ArtistName']: new_artists.append(f_artist) elif f_artist and f_artist == check_exist_song['ArtistName'] and \ check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) new_song_count += 1 else: # This track information hasn't changed if f_artist and check_exist_song[ 'Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action( "SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) total_number_of_songs = \ myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches # song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] last_completion_percentage = 0 prev_artist_name = None artistid = None for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: logger.info("Now matching songs by %s" % song['ArtistName']) song_count += 1 completion_percentage = math.floor( float(song_count) / total_number_of_songs * 1000) / 10 if completion_percentage >= (last_completion_percentage + 10): logger.info("Track matching is " + str(completion_percentage) + "% complete") last_completion_percentage = completion_percentage # THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT # EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, # ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM # (IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. albumid = None if song['ArtistName'] and song['CleanName']: artist_name = song['ArtistName'] clean_name = song['CleanName'] # Only update if artist is in the db if artist_name != prev_artist_name: prev_artist_name = artist_name artistid = None artist_lookup = "\"" + artist_name.replace("\"", "\"\"") + "\"" try: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM artists WHERE ArtistName LIKE ' + artist_lookup + '') except: dbartist = None if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM tracks WHERE CleanName = ?', [clean_name]) if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM alltracks WHERE CleanName = ?', [clean_name]) if not dbartist: clean_artist = helpers.clean_name(artist_name) if clean_artist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM tracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM alltracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) if dbartist: artistid = dbartist[0][0] if artistid: # This was previously using Artist, Album, Title with a SELECT LIKE ? and was not using an index # (Possible issue: https://stackoverflow.com/questions/37845854/python-sqlite3-not-using-index-with-like) # Now selects/updates using CleanName index (may have to revert if not working) # matching on CleanName should be enough, ensure it's the same artist just in case # Update tracks track = myDB.action( 'SELECT AlbumID, ArtistName FROM tracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() if track: albumid = track['AlbumID'] myDB.action( 'UPDATE tracks SET Location = ?, BitRate = ?, Format = ? WHERE CleanName = ? AND ArtistID = ?', [ song['Location'], song['BitRate'], song['Format'], clean_name, artistid ]) # Update alltracks alltrack = myDB.action( 'SELECT AlbumID, ArtistName FROM alltracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() if alltrack: albumid = alltrack['AlbumID'] myDB.action( 'UPDATE alltracks SET Location = ?, BitRate = ?, Format = ? WHERE CleanName = ? AND ArtistID = ?', [ song['Location'], song['BitRate'], song['Format'], clean_name, artistid ]) # Update have controlValueDict2 = {'Location': song['Location']} if albumid: newValueDict2 = {'Matched': albumid} else: newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) # myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append or artistScan: logger.info('Updating scanned artist track counts') # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() # # Don't think we need to do this, check the db instead below # # # artist scan # if ArtistName: # current_artists = [[ArtistName]] # # directory scan # else: # current_artists = myDB.select('SELECT ArtistName, ArtistID FROM artists WHERE ArtistName IS NOT NULL') # # # There was a bug where artists with special characters (-,') would show up in new artists. # # # artist_list = scanned artists not in the db # artist_list = [ # x for x in unique_artists # if helpers.clean_name(x).lower() not in [ # helpers.clean_name(y[0]).lower() # for y in current_artists # ] # ] # # # artists_checked = scanned artists that exist in the db # artists_checked = [ # x for x in unique_artists # if helpers.clean_name(x).lower() in [ # helpers.clean_name(y[0]).lower() # for y in current_artists # ] # ] new_artist_list = [] for artist in unique_artists: if not artist: continue logger.info('Processing artist: %s' % artist) # check if artist is already in the db artist_lookup = "\"" + artist.replace("\"", "\"\"") + "\"" try: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM artists WHERE ArtistName LIKE ' + artist_lookup + '') except: dbartist = None if not dbartist: clean_artist = helpers.clean_name(artist) if clean_artist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM tracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) if not dbartist: dbartist = myDB.select( 'SELECT DISTINCT ArtistID, ArtistName FROM alltracks WHERE CleanName >= ? and CleanName < ?', [clean_artist, clean_artist + '{']) # new artist not in db, add to list if not dbartist: new_artist_list.append(artist) else: # artist in db, update have track counts artistid = dbartist[0][0] # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this # havetracks = ( # len(myDB.select( # 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', # [artist])) + len(myDB.select( # 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', # [artist])) # ) try: havetracks = (len( myDB.select( 'SELECT ArtistID From tracks WHERE ArtistID = ? AND Location IS NOT NULL', [artistid]) ) + len( myDB.select( 'SELECT ArtistName FROM have WHERE ArtistName LIKE ' + artist_lookup + ' AND Matched = "Failed"'))) except Exception as e: logger.warn('Error updating counts for artist: %s: %s' % (artist, e)) # Note: some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) if havetracks: myDB.action( 'UPDATE artists SET HaveTracks = ? WHERE ArtistID = ?', [havetracks, artistid]) # Update albums to downloaded update_album_status(ArtistID=artistid) logger.info('Found %i new artists' % len(new_artist_list)) # Add scanned artists not in the db if new_artist_list: if headphones.CONFIG.AUTO_ADD_ARTISTS: logger.info('Importing %i new artists' % len(new_artist_list)) importer.artistlist_to_mbids(new_artist_list) else: logger.info( 'To add these artists, go to Manage->Manage New Artists') # myDB.action('DELETE from newartists') for artist in new_artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) if headphones.CONFIG.DETECT_BITRATE and bitrates: headphones.CONFIG.PREFERRED_BITRATE = sum(bitrates) / len( bitrates) / 1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') artist_lookup = "\"" + ArtistName.replace("\"", "\"\"") + "\"" try: havetracks = len( myDB.select( 'SELECT ArtistID FROM tracks WHERE ArtistID = ? AND Location IS NOT NULL', [ArtistID]) ) + len( myDB.select( 'SELECT ArtistName FROM have WHERE ArtistName LIKE ' + artist_lookup + ' AND Matched = "Failed"')) except Exception as e: logger.warn('Error updating counts for artist: %s: %s' % (ArtistName, e)) if havetracks: myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) # Moved above to call for each artist # if not append: # update_album_status() if not append and not artistScan: lastfm.getSimilar() if ArtistName: logger.info('Scanning complete for artist: %s', ArtistName) else: logger.info('Library scan complete')
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None): if not dir: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING)) return myDB = db.DBConnection() if not append: # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) myDB.action('DELETE from have') logger.info('Scanning music directory: %s' % dir) new_artists = [] bitrates = [] song_list = [] for r,d,f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) song_dict = { 'TrackID' : f.mb_trackid, 'ReleaseID' : f.mb_albumid, 'ArtistName' : f_artist, 'AlbumTitle' : f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre' : f.genre, 'Date' : f.date, 'TrackTitle' : f.title, 'BitRate' : f.bitrate, 'Format' : f.format, 'Location' : unicode_song_path } song_list.append(song_dict) # Now we start track matching total_number_of_songs = len(song_list) logger.info("Found " + str(total_number_of_songs) + " tracks in: '" + dir + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 for song in song_list: song_count += 1 completion_percentage = float(song_count)/total_number_of_songs * 100 if completion_percentage%10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") # If the track has a trackid & releaseid (beets: albumid) that the most surefire way # of identifying a track to a specific release so we'll use that first if song['TrackID'] and song['ReleaseID']: # Check both the tracks table & alltracks table in case they haven't populated the alltracks table yet track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() # It might be the case that the alltracks table isn't populated yet, so maybe we can only find a match in the tracks table if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() if track: # Use TrackID & ReleaseID here since there can only be one possible match with a TrackID & ReleaseID query combo controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } # Insert it into the Headphones hybrid release (ReleaseID == AlbumID) hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both the tracks table and the alltracks table using the controlValueDict and hybridControlValueDict myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Matched. Move on to the next one: continue # If we can't find it with TrackID & ReleaseID, next most specific will be # releaseid + tracktitle, although perhaps less reliable due to a higher # likelihood of variations in the song title (e.g. feat. artists) if song['ReleaseID'] and song['TrackTitle']: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if track: # There can also only be one match for this query as well (although it might be on both the tracks and alltracks table) # So use both TrackID & ReleaseID as the control values controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both tables here as well myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Done continue # Next most specific will be the opposite: a TrackID and an AlbumTitle # TrackIDs span multiple releases so if something is on an official album # and a compilation, for example, this will match it to the right one # However - there may be multiple matches here if song['TrackID'] and song['AlbumTitle']: # Even though there might be multiple matches, we just need to grab one to confirm a match track = myDB.action('SELECT TrackID, AlbumTitle from alltracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, AlbumTitle from tracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if track: # Don't need the hybridControlValueDict here since ReleaseID is not unique controlValueDict = { 'TrackID' : track['TrackID'], 'AlbumTitle' : track['AlbumTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Next most specific is the ArtistName + AlbumTitle + TrackTitle combo (but probably # even more unreliable than the previous queries, and might span multiple releases) if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if track: controlValueDict = { 'ArtistName' : track['ArtistName'], 'AlbumTitle' : track['AlbumTitle'], 'TrackTitle' : track['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Use the "CleanName" (ArtistName + AlbumTitle + TrackTitle stripped of punctuation, capitalization, etc) # This is more reliable than the former but requires some string manipulation so we'll do it only # if we can't find a match with the original data if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) track = myDB.action('SELECT CleanName from alltracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if not track: track = myDB.action('SELECT CleanName from tracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if track: controlValueDict = { 'CleanName' : track['CleanName'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Match on TrackID alone if we can't find it using any of the above methods. This method is reliable # but spans multiple releases - but that's why we're putting at the beginning as a last resort. If a track # with more specific information exists in the library, it'll overwrite these values if song['TrackID']: track = myDB.action('SELECT TrackID from alltracks WHERE TrackID=?', [song['TrackID']]).fetchone() if not track: track = myDB.action('SELECT TrackID from tracks WHERE TrackID=?', [song['TrackID']]).fetchone() if track: controlValueDict = { 'TrackID' : track['TrackID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release if song['ArtistName']: new_artists.append(song['ArtistName']) else: continue # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) else: continue myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir) if not append: # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating current artist track counts') for artist in current_artists: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID])
def libraryScan(dir=None): if not dir: dir = headphones.MUSIC_DIR try: dir = str(dir) except UnicodeEncodeError: dir = unicode(dir).encode("unicode_escape") if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir) return myDB = db.DBConnection() # Clean up bad filepaths tracks = myDB.select("SELECT Location, TrackID from tracks WHERE Location IS NOT NULL") for track in tracks: if not os.path.isfile(track["Location"].encode(headphones.SYS_ENCODING)): myDB.action( "UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?", [None, None, None, track["TrackID"]], ) logger.info("Scanning music directory: %s" % dir) new_artists = [] bitrates = [] myDB.action("DELETE from have") for r, d, f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith("." + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) file = unicode(os.path.join(r, files), headphones.SYS_ENCODING, errors="replace") # Try to read the metadata try: f = MediaFile(song) except: logger.error("Cannot read file: " + file) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Try to find a match based on artist/album/tracktitle if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: continue if f_artist and f.album and f.title: track = myDB.action( "SELECT TrackID from tracks WHERE CleanName LIKE ?", [helpers.cleanName(f_artist + " " + f.album + " " + f.title)], ).fetchone() if not track: track = myDB.action( "SELECT TrackID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?", [f_artist, f.album, f.title], ).fetchone() if track: myDB.action( "UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?", [file, f.bitrate, f.format, track["TrackID"]], ) continue # Try to match on mbid if available and we couldn't find a match based on metadata if f.mb_trackid: # Wondering if theres a better way to do this -> do one thing if the row exists, # do something else if it doesn't track = myDB.action("SELECT TrackID from tracks WHERE TrackID=?", [f.mb_trackid]).fetchone() if track: myDB.action( "UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?", [file, f.bitrate, f.format, track["TrackID"]], ) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release new_artists.append(f_artist) # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database myDB.action( "INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", [ f_artist, f.album, f.track, f.title, f.length, f.bitrate, f.genre, f.date, f.mb_trackid, file, helpers.cleanName(f_artist + " " + f.album + " " + f.title), f.format, ], ) logger.info("Completed scanning of directory: %s" % dir) logger.info("Checking filepaths to see if we can find any matches") # Now check empty file paths to see if we can find a match based on their folder format tracks = myDB.select("SELECT * from tracks WHERE Location IS NULL") for track in tracks: release = myDB.action("SELECT * from albums WHERE AlbumID=?", [track["AlbumID"]]).fetchone() try: year = release["ReleaseDate"][:4] except TypeError: year = "" artist = release["ArtistName"].replace("/", "_") album = release["AlbumTitle"].replace("/", "_") releasetype = release["Type"].replace("/", "_") if release["ArtistName"].startswith("The "): sortname = release["ArtistName"][4:] else: sortname = release["ArtistName"] if sortname.isdigit(): firstchar = "0-9" else: firstchar = sortname[0] albumvalues = { "$Artist": artist, "$Album": album, "$Year": year, "$Type": releasetype, "$First": firstchar, "$artist": artist.lower(), "$album": album.lower(), "$year": year, "$type": releasetype.lower(), "$first": firstchar.lower(), } folder = helpers.replace_all(headphones.FOLDER_FORMAT, albumvalues) folder = folder.replace("./", "_/").replace(":", "_").replace("?", "_") if folder.endswith("."): folder = folder.replace(folder[len(folder) - 1], "_") if not track["TrackNumber"]: tracknumber = "" else: tracknumber = "%02d" % track["TrackNumber"] title = track["TrackTitle"] trackvalues = { "$Track": tracknumber, "$Title": title, "$Artist": release["ArtistName"], "$Album": release["AlbumTitle"], "$Year": year, "$track": tracknumber, "$title": title.lower(), "$artist": release["ArtistName"].lower(), "$album": release["AlbumTitle"].lower(), "$year": year, } new_file_name = helpers.replace_all(headphones.FILE_FORMAT, trackvalues).replace("/", "_") + ".*" new_file_name = new_file_name.replace("?", "_").replace(":", "_") full_path_to_file = os.path.normpath(os.path.join(headphones.MUSIC_DIR, folder, new_file_name)).encode( headphones.SYS_ENCODING, "replace" ) match = glob.glob(full_path_to_file) if match: logger.info("Found a match: %s. Writing MBID to metadata" % match[0]) unipath = unicode(match[0], headphones.SYS_ENCODING, errors="replace") myDB.action("UPDATE tracks SET Location=? WHERE TrackID=?", [unipath, track["TrackID"]]) myDB.action("DELETE from have WHERE Location=?", [unipath]) # Try to insert the appropriate track id so we don't have to keep doing this try: f = MediaFile(match[0]) f.mb_trackid = track["TrackID"] f.save() myDB.action( "UPDATE tracks SET BitRate=?, Format=? WHERE TrackID=?", [f.bitrate, f.format, track["TrackID"]] ) logger.debug("Wrote mbid to track: %s" % match[0]) except: logger.error("Error embedding track id into: %s" % match[0]) continue logger.info("Done checking empty filepaths") logger.info("Done syncing library with directory: %s" % dir) # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select("SELECT ArtistName, ArtistID from artists") artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info("Updating track counts") for artist in current_artists: havetracks = len( myDB.select( "SELECT TrackTitle from tracks WHERE ArtistID like ? AND Location IS NOT NULL", [artist["ArtistID"]] ) ) + len(myDB.select("SELECT TrackTitle from have WHERE ArtistName like ?", [artist["ArtistName"]])) myDB.action("UPDATE artists SET HaveTracks=? WHERE ArtistID=?", [havetracks, artist["ArtistID"]]) logger.info("Found %i new artists" % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info("Importing %i new artists" % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info("To add these artists, go to Manage->Manage New Artists") headphones.NEW_ARTISTS = artist_list if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates) / len(bitrates) / 1000
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False): if cron and not headphones.LIBRARYSCAN: return if not dir: if not headphones.MUSIC_DIR: return else: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() if not append: # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from alltracks WHERE Location IS NOT NULL') for track in tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING) if not os.path.isfile(encoded_track_string): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) del_have_tracks = myDB.select('SELECT Location, Matched from have') for track in del_have_tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING) if not os.path.isfile(encoded_track_string): myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) myDB.action('UPDATE have SET Matched=NULL WHERE Matched=?', [track['Matched']]) logger.info('File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) ###############myDB.action('DELETE from have') logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) new_artists = [] bitrates = [] song_list = [] new_song_count = 0 for r,d,f in os.walk(dir): #need to abuse slicing to get a copy of the list, doing it directly will skip the element after a deleted one #using a list comprehension will not work correctly for nested subdirectories (os.walk keeps its original list) for directory in d[:]: if directory.startswith("."): d.remove(directory) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.cleanName(f_artist +' '+ f.album +' '+ f.title) else: CleanName = None controlValueDict = {'Location' : unicode_song_path} newValueDict = { 'TrackID' : f.mb_trackid, #'ReleaseID' : f.mb_albumid, 'ArtistName' : f_artist, 'AlbumTitle' : f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre' : f.genre, 'Date' : f.date, 'TrackTitle' : f.title, 'BitRate' : f.bitrate, 'Format' : f.format, 'CleanName' : CleanName } #song_list.append(song_dict) check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() #Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: myDB.upsert("have", newValueDict, controlValueDict) new_song_count+=1 elif check_exist_song['ArtistName'] != f_artist or check_exist_song['AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) new_song_count+=1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir+"%"]) total_number_of_songs = myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir+"%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " unmatched tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches ##############song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count-1] and song_count !=0: logger.info("Now matching songs by %s" % song['ArtistName']) #print song['ArtistName']+' - '+song['AlbumTitle']+' - '+song['TrackTitle'] song_count += 1 completion_percentage = float(song_count)/total_number_of_songs * 100 if completion_percentage%10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") #THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT #EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, #ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM #(IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if track: controlValueDict = { 'ArtistName' : track['ArtistName'], 'AlbumTitle' : track['AlbumTitle'], 'TrackTitle' : track['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = { 'ArtistName' : song['ArtistName'], 'AlbumTitle' : song['AlbumTitle'], 'TrackTitle' : song['TrackTitle'] } newValueDict2 = { 'Matched' : track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: track = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if track: controlValueDict = { 'CleanName' : track['CleanName']} newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = { 'CleanName' : song['CleanName']} newValueDict2 = { 'Matched' : track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) alltrack = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if alltrack: controlValueDict = { 'ArtistName' : alltrack['ArtistName'], 'AlbumTitle' : alltrack['AlbumTitle'], 'TrackTitle' : alltrack['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = { 'ArtistName' : song['ArtistName'], 'AlbumTitle' : song['AlbumTitle'], 'TrackTitle' : song['TrackTitle'] } newValueDict2 = { 'Matched' : alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: alltrack = myDB.action('SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if alltrack: controlValueDict = { 'CleanName' : alltrack['CleanName']} newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = { 'CleanName' : song['CleanName']} newValueDict2 = { 'Matched' : alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release if song['ArtistName']: new_artists.append(song['ArtistName']) else: continue #######myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') #There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [f for f in unique_artists if helpers.cleanName(f).lower() not in [helpers.cleanName(x[0]).lower() for x in current_artists]] # Update track counts logger.info('Updating current artist track counts') for artist in current_artists: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched IS NULL', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched IS NULL', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) update_album_status() logger.info('Library scan complete')
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False, artistScan=False): if cron and not headphones.CONFIG.LIBRARYSCAN: return if not dir: if not headphones.CONFIG.MUSIC_DIR: return else: dir = headphones.CONFIG.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append or artistScan: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() new_artists = [] logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up bad filepaths tracks = myDB.select( 'SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL') for track in tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) del_have_tracks = myDB.select('SELECT Location, Matched, ArtistName from have') for track in del_have_tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): if track['ArtistName']: # Make sure deleted files get accounted for when updating artist track counts new_artists.append(track['ArtistName']) myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) logger.info( 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode( headphones.SYS_ENCODING, 'replace')) bitrates = [] song_list = [] latest_subdirectory = [] new_song_count = 0 file_count = 0 for r, d, f in helpers.walk_directory(dir): # Filter paths based on config. Note that these methods work directly # on the inputs helpers.path_filter_patterns(d, headphones.CONFIG.IGNORED_FOLDERS, r) helpers.path_filter_patterns(f, headphones.CONFIG.IGNORED_FILES, r) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': logger.info("[%s] Now scanning subdirectory %s" % ( dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) elif latest_subdirectory[file_count] != latest_subdirectory[ file_count - 1] and file_count != 0: logger.info("[%s] Now scanning subdirectory %s" % ( dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): logger.warning( "Cannot read media file '%s', skipping. It may be corrupted or not a media file.", unicode_song_path) continue except IOError: logger.warning("Cannnot read media file '%s', skipping. Does the file exists?", unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.clean_name(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None controlValueDict = {'Location': unicode_song_path} newValueDict = {'TrackID': f.mb_trackid, # 'ReleaseID' : f.mb_albumid, 'ArtistName': f_artist, 'AlbumTitle': f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre': f.genre, 'Date': f.date, 'TrackTitle': f.title, 'BitRate': f.bitrate, 'Format': f.format, 'CleanName': CleanName } # song_list.append(song_dict) check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() # Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: # This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: if check_exist_song['ArtistName'] != f_artist or check_exist_song[ 'AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: # Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song['ArtistName']: new_artists.append(f_artist) elif f_artist and f_artist == check_exist_song['ArtistName'] and \ check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) new_song_count += 1 else: # This track information hasn't changed if f_artist and check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) total_number_of_songs = \ myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode( headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches # song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] last_completion_percentage = 0 for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: logger.info("Now matching songs by %s" % song['ArtistName']) song_count += 1 completion_percentage = math.floor(float(song_count) / total_number_of_songs * 1000) / 10 if completion_percentage >= (last_completion_percentage + 10): logger.info("Track matching is " + str(completion_percentage) + "% complete") last_completion_percentage = completion_percentage # THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT # EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, # ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM # (IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() have_updated = False if track: controlValueDict = {'ArtistName': track['ArtistName'], 'AlbumTitle': track['AlbumTitle'], 'TrackTitle': track['TrackTitle']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: track = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if track: controlValueDict = {'CleanName': track['CleanName']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True alltrack = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if alltrack: controlValueDict = {'ArtistName': alltrack['ArtistName'], 'AlbumTitle': alltrack['AlbumTitle'], 'TrackTitle': alltrack['TrackTitle']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: alltrack = myDB.action( 'SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if alltrack: controlValueDict = {'CleanName': alltrack['CleanName']} newValueDict = {'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format']} myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: # alltracks may not exist if adding album manually, have should only be set to failed if not already updated in tracks if not have_updated: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) # myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append or artistScan: logger.info('Updating scanned artist track counts') # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') # There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [ x for x in unique_artists if helpers.clean_name(x).lower() not in [ helpers.clean_name(y[0]).lower() for y in current_artists ] ] artists_checked = [ x for x in unique_artists if helpers.clean_name(x).lower() in [ helpers.clean_name(y[0]).lower() for y in current_artists ] ] # Update track counts for artist in artists_checked: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = ( len(myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', [artist])) + len(myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artist])) ) # Note: some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistName=?', [havetracks, artist]) logger.info('Found %i new artists' % len(artist_list)) if artist_list: if headphones.CONFIG.AUTO_ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') # myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) if headphones.CONFIG.DETECT_BITRATE and bitrates: headphones.CONFIG.PREFERRED_BITRATE = sum(bitrates) / len(bitrates) / 1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len( myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) if not append: update_album_status() if not append and not artistScan: lastfm.getSimilar() logger.info('Library scan complete')
def libraryScan(dir=None): if not dir: dir = headphones.MUSIC_DIR try: dir = str(dir) except UnicodeEncodeError: dir = unicode(dir).encode('unicode_escape') if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir) return myDB = db.DBConnection() # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) logger.info('Scanning music directory: %s' % dir) new_artists = [] bitrates = [] myDB.action('DELETE from have') for r,d,f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) file = unicode(os.path.join(r, files), headphones.SYS_ENCODING, errors='replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + file) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Try to find a match based on artist/album/tracktitle if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: continue if f_artist and f.album and f.title: track = myDB.action('SELECT TrackID from tracks WHERE CleanName LIKE ?', [helpers.cleanName(f_artist +' '+f.album+' '+f.title)]).fetchone() if not track: track = myDB.action('SELECT TrackID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [f_artist, f.album, f.title]).fetchone() if track: myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [file, f.bitrate, f.format, track['TrackID']]) continue # Try to match on mbid if available and we couldn't find a match based on metadata if f.mb_trackid: # Wondering if theres a better way to do this -> do one thing if the row exists, # do something else if it doesn't track = myDB.action('SELECT TrackID from tracks WHERE TrackID=?', [f.mb_trackid]).fetchone() if track: myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [file, f.bitrate, f.format, track['TrackID']]) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release new_artists.append(f_artist) # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [f_artist, f.album, f.track, f.title, f.length, f.bitrate, f.genre, f.date, f.mb_trackid, file, helpers.cleanName(f_artist+' '+f.album+' '+f.title), f.format]) logger.info('Completed scanning of directory: %s' % dir) logger.info('Checking filepaths to see if we can find any matches') # Now check empty file paths to see if we can find a match based on their folder format tracks = myDB.select('SELECT * from tracks WHERE Location IS NULL') for track in tracks: release = myDB.action('SELECT * from albums WHERE AlbumID=?', [track['AlbumID']]).fetchone() try: year = release['ReleaseDate'][:4] except TypeError: year = '' artist = release['ArtistName'].replace('/', '_') album = release['AlbumTitle'].replace('/', '_') if release['ArtistName'].startswith('The '): sortname = release['ArtistName'][4:] else: sortname = release['ArtistName'] if sortname.isdigit(): firstchar = '0-9' else: firstchar = sortname[0] lowerfirst = firstchar.lower() albumvalues = { 'artist': artist, 'album': album, 'year': year, 'first': firstchar, 'lowerfirst': lowerfirst } folder = helpers.replace_all(headphones.FOLDER_FORMAT, albumvalues) folder = folder.replace('./', '_/').replace(':','_').replace('?','_') if folder.endswith('.'): folder = folder.replace(folder[len(folder)-1], '_') if not track['TrackNumber']: tracknumber = '' else: tracknumber = '%02d' % track['TrackNumber'] trackvalues = { 'tracknumber': tracknumber, 'title': track['TrackTitle'], 'artist': release['ArtistName'], 'album': release['AlbumTitle'], 'year': year } new_file_name = helpers.replace_all(headphones.FILE_FORMAT, trackvalues).replace('/','_') + '.*' new_file_name = new_file_name.replace('?','_').replace(':', '_') full_path_to_file = os.path.normpath(os.path.join(headphones.MUSIC_DIR, folder, new_file_name)).encode(headphones.SYS_ENCODING, 'replace') match = glob.glob(full_path_to_file) if match: logger.info('Found a match: %s. Writing MBID to metadata' % match[0]) unipath = unicode(match[0], headphones.SYS_ENCODING, errors='replace') myDB.action('UPDATE tracks SET Location=? WHERE TrackID=?', [unipath, track['TrackID']]) myDB.action('DELETE from have WHERE Location=?', [unipath]) # Try to insert the appropriate track id so we don't have to keep doing this try: f = MediaFile(match[0]) f.mb_trackid = track['TrackID'] f.save() myDB.action('UPDATE tracks SET BitRate=?, Format=? WHERE TrackID=?', [f.bitrate, f.format, track['TrackID']]) logger.debug('Wrote mbid to track: %s' % match[0]) except: logger.error('Error embedding track id into: %s' % match[0]) continue logger.info('Done checking empty filepaths') logger.info('Done syncing library with directory: %s' % dir) # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating track counts') for artist in current_artists: havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID like ? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') headphones.NEW_ARTISTS = artist_list if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False): if cron and not headphones.LIBRARYSCAN: return if not dir: if not headphones.MUSIC_DIR: return else: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() new_artists = [] logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up bad filepaths tracks = myDB.select( 'SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL' ) for track in tracks: encoded_track_string = track['Location'].encode( headphones.SYS_ENCODING) if not os.path.isfile(encoded_track_string): myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) del_have_tracks = myDB.select( 'SELECT Location, Matched, ArtistName from have') for track in del_have_tracks: encoded_track_string = track['Location'].encode( headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): if track['ArtistName']: #Make sure deleted files get accounted for when updating artist track counts new_artists.append(track['ArtistName']) myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) logger.info( 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) ###############myDB.action('DELETE from have') bitrates = [] song_list = [] new_song_count = 0 file_count = 0 latest_subdirectory = [] for r, d, f in os.walk(dir): #need to abuse slicing to get a copy of the list, doing it directly will skip the element after a deleted one #using a list comprehension will not work correctly for nested subdirectories (os.walk keeps its original list) for directory in d[:]: if directory.startswith("."): d.remove(directory) for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) elif latest_subdirectory[file_count] != latest_subdirectory[ file_count - 1] and file_count != 0: logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): logger.error( "Cannot read file media file '%s'. It may be corrupted or not a media file.", unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.cleanName(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None controlValueDict = {'Location': unicode_song_path} newValueDict = { 'TrackID': f.mb_trackid, #'ReleaseID' : f.mb_albumid, 'ArtistName': f_artist, 'AlbumTitle': f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre': f.genre, 'Date': f.date, 'TrackTitle': f.title, 'BitRate': f.bitrate, 'Format': f.format, 'CleanName': CleanName } #song_list.append(song_dict) check_exist_song = myDB.action( "SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() #Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: #This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: if check_exist_song[ 'ArtistName'] != f_artist or check_exist_song[ 'AlbumTitle'] != f.album or check_exist_song[ 'TrackTitle'] != f.title: #Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song[ 'ArtistName']: new_artists.append(f_artist) elif f_artist and f_artist == check_exist_song[ 'ArtistName'] and check_exist_song[ 'Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) new_song_count += 1 else: #This track information hasn't changed if f_artist and check_exist_song[ 'Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) song_list = myDB.action( "SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) total_number_of_songs = myDB.action( "SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches ##############song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] for song in song_list: latest_artist.append(song['ArtistName']) if song_count == 0: logger.info("Now matching songs by %s" % song['ArtistName']) elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: logger.info("Now matching songs by %s" % song['ArtistName']) #print song['ArtistName']+' - '+song['AlbumTitle']+' - '+song['TrackTitle'] song_count += 1 completion_percentage = float(song_count) / total_number_of_songs * 100 if completion_percentage % 10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") #THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT #EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, #ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM #(IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle'] ]).fetchone() have_updated = False if track: controlValueDict = { 'ArtistName': track['ArtistName'], 'AlbumTitle': track['AlbumTitle'], 'TrackTitle': track['TrackTitle'] } newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: track = myDB.action( 'SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if track: controlValueDict = {'CleanName': track['CleanName']} newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': track['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True alltrack = myDB.action( 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle'] ]).fetchone() if alltrack: controlValueDict = { 'ArtistName': alltrack['ArtistName'], 'AlbumTitle': alltrack['AlbumTitle'], 'TrackTitle': alltrack['TrackTitle'] } newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: alltrack = myDB.action( 'SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() if alltrack: controlValueDict = {'CleanName': alltrack['CleanName']} newValueDict = { 'Location': song['Location'], 'BitRate': song['BitRate'], 'Format': song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: # alltracks may not exist if adding album manually, have should only be set to failed if not already updated in tracks if not have_updated: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) else: controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) #######myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: logger.info('Updating scanned artist track counts') # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select( 'SELECT ArtistName, ArtistID from artists') #There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [ f for f in unique_artists if helpers.cleanName(f).lower() not in [helpers.cleanName(x[0]).lower() for x in current_artists] ] artists_checked = [ f for f in unique_artists if helpers.cleanName(f).lower() in [helpers.cleanName(x[0]).lower() for x in current_artists] ] # Update track counts for artist in artists_checked: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len( myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', [artist]) ) + len( myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artist])) #Note, some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistName=?', [havetracks, artist]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info( 'To add these artists, go to Manage->Manage New Artists') #myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates) / len(bitrates) / 1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len( myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID]) ) + len( myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) if not append: update_album_status() lastfm.getSimilar() logger.info('Library scan complete')
def libraryScan(dir=None): if not dir: dir = headphones.MUSIC_DIR dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING)) return myDB = db.DBConnection() # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) logger.info('Scanning music directory: %s' % dir) new_artists = [] bitrates = [] myDB.action('DELETE from have') for r,d,f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Try to find a match based on artist/album/tracktitle if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: continue if f_artist and f.album and f.title: track = myDB.action('SELECT TrackID from tracks WHERE CleanName LIKE ?', [helpers.cleanName(f_artist +' '+f.album+' '+f.title)]).fetchone() if not track: track = myDB.action('SELECT TrackID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [f_artist, f.album, f.title]).fetchone() if track: myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [unicode_song_path, f.bitrate, f.format, track['TrackID']]) continue # Try to match on mbid if available and we couldn't find a match based on metadata if f.mb_trackid: # Wondering if theres a better way to do this -> do one thing if the row exists, # do something else if it doesn't track = myDB.action('SELECT TrackID from tracks WHERE TrackID=?', [f.mb_trackid]).fetchone() if track: myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [unicode_song_path, f.bitrate, f.format, track['TrackID']]) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release new_artists.append(f_artist) # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [f_artist, f.album, f.track, f.title, f.length, f.bitrate, f.genre, f.date, f.mb_trackid, unicode_song_path, helpers.cleanName(f_artist+' '+f.album+' '+f.title), f.format]) logger.info('Completed scanning directory: %s' % dir) # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating track counts') for artist in current_artists: havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID like ? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None): if not dir: dir = headphones.MUSIC_DIR # If we're appending a dir, it's coming from the post processor which is # already bytestring if not append: dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) return myDB = db.DBConnection() if not append: # Clean up bad filepaths tracks = myDB.select('SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode(headphones.SYS_ENCODING)): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) myDB.action('DELETE from have') logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) new_artists = [] bitrates = [] song_list = [] for r,d,f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) # We need the unicode path to use for logging, inserting into database unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Use the album artist over the artist if available if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: f_artist = None # Add the song to our song list - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) song_dict = { 'TrackID' : f.mb_trackid, 'ReleaseID' : f.mb_albumid, 'ArtistName' : f_artist, 'AlbumTitle' : f.album, 'TrackNumber': f.track, 'TrackLength': f.length, 'Genre' : f.genre, 'Date' : f.date, 'TrackTitle' : f.title, 'BitRate' : f.bitrate, 'Format' : f.format, 'Location' : unicode_song_path } song_list.append(song_dict) # Now we start track matching total_number_of_songs = len(song_list) logger.info("Found " + str(total_number_of_songs) + " tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 for song in song_list: song_count += 1 completion_percentage = float(song_count)/total_number_of_songs * 100 if completion_percentage%10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") # If the track has a trackid & releaseid (beets: albumid) that the most surefire way # of identifying a track to a specific release so we'll use that first if song['TrackID'] and song['ReleaseID']: # Check both the tracks table & alltracks table in case they haven't populated the alltracks table yet track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() # It might be the case that the alltracks table isn't populated yet, so maybe we can only find a match in the tracks table if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE TrackID=? AND ReleaseID=?', [song['TrackID'], song['ReleaseID']]).fetchone() if track: # Use TrackID & ReleaseID here since there can only be one possible match with a TrackID & ReleaseID query combo controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } # Insert it into the Headphones hybrid release (ReleaseID == AlbumID) hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both the tracks table and the alltracks table using the controlValueDict and hybridControlValueDict myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Matched. Move on to the next one: continue # If we can't find it with TrackID & ReleaseID, next most specific will be # releaseid + tracktitle, although perhaps less reliable due to a higher # likelihood of variations in the song title (e.g. feat. artists) if song['ReleaseID'] and song['TrackTitle']: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from alltracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, ReleaseID, AlbumID from tracks WHERE ReleaseID=? AND TrackTitle=?', [song['ReleaseID'], song['TrackTitle']]).fetchone() if track: # There can also only be one match for this query as well (although it might be on both the tracks and alltracks table) # So use both TrackID & ReleaseID as the control values controlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['ReleaseID'] } hybridControlValueDict = { 'TrackID' : track['TrackID'], 'ReleaseID' : track['AlbumID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } # Update both tables here as well myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) myDB.upsert("alltracks", newValueDict, hybridControlValueDict) myDB.upsert("tracks", newValueDict, hybridControlValueDict) # Done continue # Next most specific will be the opposite: a TrackID and an AlbumTitle # TrackIDs span multiple releases so if something is on an official album # and a compilation, for example, this will match it to the right one # However - there may be multiple matches here if song['TrackID'] and song['AlbumTitle']: # Even though there might be multiple matches, we just need to grab one to confirm a match track = myDB.action('SELECT TrackID, AlbumTitle from alltracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if not track: track = myDB.action('SELECT TrackID, AlbumTitle from tracks WHERE TrackID=? AND AlbumTitle LIKE ?', [song['TrackID'], song['AlbumTitle']]).fetchone() if track: # Don't need the hybridControlValueDict here since ReleaseID is not unique controlValueDict = { 'TrackID' : track['TrackID'], 'AlbumTitle' : track['AlbumTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Next most specific is the ArtistName + AlbumTitle + TrackTitle combo (but probably # even more unreliable than the previous queries, and might span multiple releases) if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if not track: track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if track: controlValueDict = { 'ArtistName' : track['ArtistName'], 'AlbumTitle' : track['AlbumTitle'], 'TrackTitle' : track['TrackTitle'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Use the "CleanName" (ArtistName + AlbumTitle + TrackTitle stripped of punctuation, capitalization, etc) # This is more reliable than the former but requires some string manipulation so we'll do it only # if we can't find a match with the original data if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) track = myDB.action('SELECT CleanName from alltracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if not track: track = myDB.action('SELECT CleanName from tracks WHERE CleanName LIKE ?', [CleanName]).fetchone() if track: controlValueDict = { 'CleanName' : track['CleanName'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # Match on TrackID alone if we can't find it using any of the above methods. This method is reliable # but spans multiple releases - but that's why we're putting at the beginning as a last resort. If a track # with more specific information exists in the library, it'll overwrite these values if song['TrackID']: track = myDB.action('SELECT TrackID from alltracks WHERE TrackID=?', [song['TrackID']]).fetchone() if not track: track = myDB.action('SELECT TrackID from tracks WHERE TrackID=?', [song['TrackID']]).fetchone() if track: controlValueDict = { 'TrackID' : track['TrackID'] } newValueDict = { 'Location' : song['Location'], 'BitRate' : song['BitRate'], 'Format' : song['Format'] } myDB.upsert("alltracks", newValueDict, controlValueDict) myDB.upsert("tracks", newValueDict, controlValueDict) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release if song['ArtistName']: new_artists.append(song['ArtistName']) else: continue # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: CleanName = helpers.cleanName(song['ArtistName'] +' '+ song['AlbumTitle'] +' '+song['TrackTitle']) else: continue myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) if not append: # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists]] # Update track counts logger.info('Updating current artist track counts') for artist in current_artists: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artist['ArtistID']])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT into newartists VALUES (?)', [artist]) if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates)/len(bitrates)/1000 else: # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID])
def libraryScan(dir=None): if not dir: dir = headphones.MUSIC_DIR try: dir = str(dir) except UnicodeEncodeError: dir = unicode(dir).encode('unicode_escape') if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir) return myDB = db.DBConnection() # Clean up bad filepaths tracks = myDB.select( 'SELECT Location, TrackID from tracks WHERE Location IS NOT NULL') for track in tracks: if not os.path.isfile(track['Location'].encode( headphones.SYS_ENCODING)): myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [None, None, None, track['TrackID']]) logger.info('Scanning music directory: %s' % dir) new_artists = [] bitrates = [] myDB.action('DELETE from have') for r, d, f in os.walk(dir): for files in f: # MEDIA_FORMATS = music file extensions, e.g. mp3, flac, etc if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): song = os.path.join(r, files) file = unicode(os.path.join(r, files), headphones.SYS_ENCODING, errors='replace') # Try to read the metadata try: f = MediaFile(song) except: logger.error('Cannot read file: ' + file) continue # Grab the bitrates for the auto detect bit rate option if f.bitrate: bitrates.append(f.bitrate) # Try to find a match based on artist/album/tracktitle if f.albumartist: f_artist = f.albumartist elif f.artist: f_artist = f.artist else: continue if f_artist and f.album and f.title: track = myDB.action( 'SELECT TrackID from tracks WHERE CleanName LIKE ?', [ helpers.cleanName(f_artist + ' ' + f.album + ' ' + f.title) ]).fetchone() if not track: track = myDB.action( 'SELECT TrackID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [f_artist, f.album, f.title]).fetchone() if track: myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [file, f.bitrate, f.format, track['TrackID']]) continue # Try to match on mbid if available and we couldn't find a match based on metadata if f.mb_trackid: # Wondering if theres a better way to do this -> do one thing if the row exists, # do something else if it doesn't track = myDB.action( 'SELECT TrackID from tracks WHERE TrackID=?', [f.mb_trackid]).fetchone() if track: myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE TrackID=?', [file, f.bitrate, f.format, track['TrackID']]) continue # if we can't find a match in the database on a track level, it might be a new artist or it might be on a non-mb release new_artists.append(f_artist) # The have table will become the new database for unmatched tracks (i.e. tracks with no associated links in the database myDB.action( 'INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [ f_artist, f.album, f.track, f.title, f.length, f.bitrate, f.genre, f.date, f.mb_trackid, file, helpers.cleanName(f_artist + ' ' + f.album + ' ' + f.title), f.format ]) logger.info('Completed scanning of directory: %s' % dir) logger.info('Checking filepaths to see if we can find any matches') # Now check empty file paths to see if we can find a match based on their folder format tracks = myDB.select('SELECT * from tracks WHERE Location IS NULL') for track in tracks: release = myDB.action('SELECT * from albums WHERE AlbumID=?', [track['AlbumID']]).fetchone() try: year = release['ReleaseDate'][:4] except TypeError: year = '' artist = release['ArtistName'].replace('/', '_') album = release['AlbumTitle'].replace('/', '_') releasetype = release['Type'].replace('/', '_') if release['ArtistName'].startswith('The '): sortname = release['ArtistName'][4:] else: sortname = release['ArtistName'] if sortname.isdigit(): firstchar = '0-9' else: firstchar = sortname[0] albumvalues = { '$Artist': artist, '$Album': album, '$Year': year, '$Type': releasetype, '$First': firstchar, '$artist': artist.lower(), '$album': album.lower(), '$year': year, '$type': releasetype.lower(), '$first': firstchar.lower() } folder = helpers.replace_all(headphones.FOLDER_FORMAT, albumvalues) folder = folder.replace('./', '_/').replace(':', '_').replace('?', '_') if folder.endswith('.'): folder = folder.replace(folder[len(folder) - 1], '_') if not track['TrackNumber']: tracknumber = '' else: tracknumber = '%02d' % track['TrackNumber'] title = track['TrackTitle'] trackvalues = { '$Track': tracknumber, '$Title': title, '$Artist': release['ArtistName'], '$Album': release['AlbumTitle'], '$Year': year, '$track': tracknumber, '$title': title.lower(), '$artist': release['ArtistName'].lower(), '$album': release['AlbumTitle'].lower(), '$year': year } new_file_name = helpers.replace_all( headphones.FILE_FORMAT, trackvalues).replace('/', '_') + '.*' new_file_name = new_file_name.replace('?', '_').replace(':', '_') full_path_to_file = os.path.normpath( os.path.join(headphones.MUSIC_DIR, folder, new_file_name)).encode(headphones.SYS_ENCODING, 'replace') match = glob.glob(full_path_to_file) if match: logger.info('Found a match: %s. Writing MBID to metadata' % match[0]) unipath = unicode(match[0], headphones.SYS_ENCODING, errors='replace') myDB.action('UPDATE tracks SET Location=? WHERE TrackID=?', [unipath, track['TrackID']]) myDB.action('DELETE from have WHERE Location=?', [unipath]) # Try to insert the appropriate track id so we don't have to keep doing this try: f = MediaFile(match[0]) f.mb_trackid = track['TrackID'] f.save() myDB.action( 'UPDATE tracks SET BitRate=?, Format=? WHERE TrackID=?', [f.bitrate, f.format, track['TrackID']]) logger.debug('Wrote mbid to track: %s' % match[0]) except: logger.error('Error embedding track id into: %s' % match[0]) continue logger.info('Done checking empty filepaths') logger.info('Done syncing library with directory: %s' % dir) # Clean up the new artist list unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') artist_list = [ f for f in unique_artists if f.lower() not in [x[0].lower() for x in current_artists] ] # Update track counts logger.info('Updating track counts') for artist in current_artists: havetracks = len( myDB.select( 'SELECT TrackTitle from tracks WHERE ArtistID like ? AND Location IS NOT NULL', [artist['ArtistID']])) + len( myDB.select( 'SELECT TrackTitle from have WHERE ArtistName like ?', [artist['ArtistName']])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artist['ArtistID']]) logger.info('Found %i new artists' % len(artist_list)) if len(artist_list): if headphones.ADD_ARTISTS: logger.info('Importing %i new artists' % len(artist_list)) importer.artistlist_to_mbids(artist_list) else: logger.info( 'To add these artists, go to Manage->Manage New Artists') headphones.NEW_ARTISTS = artist_list if headphones.DETECT_BITRATE: headphones.PREFERRED_BITRATE = sum(bitrates) / len(bitrates) / 1000