class CacheManagerTestCase(unittest.TestCase): def setUp(self): self.cache = CacheManager(use_db=False) def test_non_existent_artist(self): self.assertIsNone(self.cache.get_artist('NQH')) def test_save_artist(self): pirexia = MockArtist('Pirexia') self.cache.add_artist(pirexia) self.assertIs(self.cache.get_artist('Pirexia'), pirexia) def test_save_album(self): fun_people = MockArtist('Fun People') kum_kum = MockAlbum('Kum Kum') self.cache.add_album(kum_kum, fun_people) self.assertIs(self.cache.get_album('Kum Kum', fun_people), kum_kum) def test_clear(self): eterna = MockArtist('Eterna Inocencia') ei = MockAlbum('EI') self.cache.add_artist(eterna) self.cache.add_album(ei, eterna) self.cache.clear() self.assertIsNone(self.cache.get_artist('Eterna Inocencia')) self.assertIsNone(self.cache.get_album('EI', eterna)) def test_no_ram_cache(self): cache = CacheManager(ram_cache=False, use_db=False) rudos = MockArtist('Rudos Wild') cache.add_artist(rudos) self.assertIsNone(cache.get_artist('Rudos Wild'))
class Indexer(object): VALID_FILE_EXTENSIONS = ( 'asf', 'wma', # ASF 'flac', # FLAC 'mp4', 'm4a', 'm4b', 'm4p', # M4A 'ape', # Monkey's Audio 'mp3', # MP3 'mpc', 'mp+', 'mpp', # Musepack 'spx', # Ogg Speex 'ogg', 'oga', # Ogg Vorbis / Theora 'tta', # True Audio 'wv', # WavPack 'ofr', # OptimFROG ) def __init__(self, config=None, use_lastfm=False, hash_files=False, no_metadata=False, reindex=False, write_every=0): self.config = config self.use_lastfm = use_lastfm self.hash_files = hash_files self.no_metadata = no_metadata self.reindex = reindex self.write_every = write_every self.empty_db = reindex # If we are going to have only 1 track in cache at any time we might as # well just ignore it completely. use_cache = (write_every != 1) self.cache = CacheManager(ram_cache=use_cache, use_db=not use_cache) self.session = db.session self.media_dirs = config.get('MEDIA_DIRS', []) self.allowed_extensions = app.config.get('ALLOWED_FILE_EXTENSIONS', self.VALID_FILE_EXTENSIONS) self._ext = None self._meta = None self.track_count = 0 self.skipped_tracks = 0 self.count_by_extension = {} for extension in self.allowed_extensions: self.count_by_extension[extension] = 0 if self.use_lastfm: self.lastfm = LastFM(api_key=config['LASTFM_API_KEY'], use_cache=(write_every > 1)) if not len(self.media_dirs): log.error("Remember to set the MEDIA_DIRS option, otherwise I " "don't know where to look for.") if reindex: log.info('Dropping database...') confirmed = raw_input('This will destroy all the information. ' 'Proceed? [y/N] ') in ('y', 'Y') if not confirmed: log.error('Aborting.') sys.exit(1) db.drop_all() log.info('Recreating database...') db.create_all() # This is useful to know if the DB is empty, and avoid some checks if not self.reindex: try: m.Track.query.limit(1).all() except OperationalError: self.empty_db = True def get_artist(self, name): name = name.strip() if type(name) in (str, unicode) else None if not name: return None artist = self.cache.get_artist(name) if artist: return artist artist = m.Artist(name=name, image=self.get_artist_image(name)) self.session.add(artist) self.cache.add_artist(artist) return artist def get_artist_image(self, name): if self.use_lastfm: return self.lastfm.get_artist_image(name) return None def get_album(self, name, artist): name = name.strip() if type(name) in (str, unicode) else None if not name or not artist: return None album = self.cache.get_album(name, artist) if album: return album release_year = self.get_release_year(name, artist) cover = self.get_album_cover(name, artist) album = m.Album(name=name, year=release_year, cover=cover) self.session.add(album) self.cache.add_album(album, artist) return album def get_album_cover(self, album, artist): if self.use_lastfm: return self.lastfm.get_album_cover(album, artist) return None def get_release_year(self, album, artist): if self.use_lastfm: rdate = self.lastfm.get_release_date(album, artist) return rdate.year if rdate else None return self.get_metadata_reader().release_year def add_to_session(self, track): self.session.add(track) ext = self.get_extension() self.count_by_extension[ext] += 1 log.info('[ OK ] %s' % track.path) return True def skip(self, reason=None, print_traceback=None): self.skipped_tracks += 1 if log.getEffectiveLevel() <= logging.INFO: _reason = ' (%s)' % reason if reason else '' log.info('[ SKIPPED ] %s%s' % (self.file_path, _reason)) if print_traceback: log.info(traceback.format_exc()) return True def commit(self, force=False): if not force: if not self.write_every: return False if self.track_count % self.write_every != 0: return False log.debug('Writing to database...') self.session.commit() if self.write_every > 1: log.debug('Clearing cache') self.cache.clear() if self.use_lastfm: self.lastfm.clear_cache() return True def save_track(self): """ Takes a path to a track, reads its metadata and stores everything in the database. """ try: full_path = self.file_path.decode('utf-8') except UnicodeDecodeError: self.skip('Unrecognized encoding', print_traceback=True) # If file name is in an strange encoding ignore it. return False try: track = m.Track(full_path, no_metadata=self.no_metadata, hash_file=self.hash_files) except MetadataManagerReadError: self.skip('Corrupted file', print_traceback=True) # If the metadata manager can't read the file, it's probably not an # actual music file, or it's corrupted. Ignore it. return False if not self.empty_db: if q(m.Track).filter_by(path=full_path).count(): self.skip() return True if self.hash_files: if self.cache.hash_exists(track.hash): self.skip('Duplicated file') return True if self.no_metadata: self.add_to_session(track) return True meta = self.set_metadata_reader(track) artist = self.get_artist(meta.artist) album = self.get_album(meta.album, artist) if album and artist: if artist not in album.artists: album.artists.append(artist) track.album = album track.artist = artist self.add_to_session(track) self.cache.add_hash(track.hash) self.commit() def get_metadata_reader(self): return self._meta def set_metadata_reader(self, track): self._meta = track.get_metadata_reader() return self._meta def get_extension(self): return self.file_path.rsplit('.', 1)[1].lower() def is_track(self): """Try to guess whether the file is a valid track or not.""" if not os.path.isfile(self.file_path): return False if '.' not in self.file_path: return False ext = self.get_extension() if ext not in self.VALID_FILE_EXTENSIONS: log.debug('[ SKIPPED ] %s (Unrecognized extension)' % self.file_path) return False elif ext not in self.allowed_extensions: log.debug('[ SKIPPED ] %s (Ignored extension)' % self.file_path) return False return True def walk(self, target, exclude=tuple()): """Recursively walks through a directory looking for tracks.""" if not os.path.isdir(target): return False for root, dirs, files in os.walk(target, exclude): for name in files: self.file_path = os.path.join(root, name) if root in exclude: log.debug('[ EXCLUDED ] %s' % self.file_path) else: if self.is_track(): self.track_count += 1 self.save_track() def _make_unique(self, model): """ Retrieves all repeated slugs for a given model and appends the instance's primary key to it. """ slugs = q(model).group_by(model.slug).\ having(func.count(model.slug) > 1) for row in slugs: slug = row.slug for instance in q(model).filter_by(slug=row.slug): instance.slug += '-%s' % instance.pk return slugs # SELECT pk, slug, COUNT(*) FROM tracks GROUP BY slug HAVING COUNT(*) > 1; def make_slugs_unique(self): query = self._make_unique(m.Artist) self.session.add_all(query) query = self._make_unique(m.Track) self.session.add_all(query) self.session.commit() def print_stats(self): if self.track_count == 0: log.info('\nNo track indexed.\n') return True elapsed_time = self.final_time - self.initial_time log.info('\nRun in %d seconds. Avg %.3fs/track.' % ( elapsed_time, (elapsed_time / self.track_count))) log.info('Found %d tracks. Skipped: %d. Indexed: %d.' % ( self.track_count, self.skipped_tracks, (self.track_count - self.skipped_tracks))) for extension, count in self.count_by_extension.iteritems(): if count: log.info('%s: %d tracks' % (extension, count)) def run(self): self.initial_time = time() for mobject in self.media_dirs: for mdir in mobject.get_valid_dirs(): self.walk(mdir, exclude=mobject.get_excluded_dirs()) self.final_time = time()