def features(self, name, read_from_cache=True, save_to_cache=True, **kwargs): """Computes features for this track using the named feature extractor. @raise ValueError: if no FeatureExtractor named <name> is found""" extractor = FeatureExtractor.query.filter_by(name=unicode(name)).first() if not extractor: raise ValueError('No feature extractor named %s' % name) if read_from_cache: featfile = None try: featfile = self._open_feature_cache(mode='r') if featfile.has_features(extractor, kwargs): features = featfile.get_features(extractor, kwargs) return features except: log.debug('Error reading cached features from %s', self.fn_feature) #import traceback; traceback.print_exc() finally: if featfile: featfile.close() Y = extractor.extract_features(self, **kwargs) if save_to_cache: if not os.path.exists(self.fn_feature): from gordon import make_subdirs make_subdirs(self.fn_feature) featfile = self._open_feature_cache(mode='a') featfile.set_features(extractor, Y, kwargs=kwargs) featfile.close() return Y
def do_ftp(site="ftp://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport", force=False): """Imports fresh database files to DEF_GORDON_DIR/mbrainz/mbdump/latest""" import ftplib ftp = ftplib.FTP("ftp.musicbrainz.org") ftp.login("anonymous", "") ftp.cwd("pub/musicbrainz/data/fullexport") for f in ftp.nlst(): if f.startswith("latest-is"): dr = f[10:] print "Latest is", dr testdir = os.path.join(config.DEF_GORDON_DIR, "mbrainz", "mbdump", dr) if os.path.exists(testdir): print "We already have this dump... skipping. Set force=True to force download" if not force: ftp.close() return # handle for our writing def _ftp_handle(block): fout.write(block) print ".", # we should be in the right directory now ftp.cwd(dr) for f in ftp.nlst(): f = f.strip() # Open the file for writing in binary mode fnout = os.path.join(config.DEF_GORDON_DIR, "mbrainz", "mbdump", dr, f) make_subdirs(fnout) print "Opening local file " + fnout fout = open(fnout, "wb") print "downloading", f ftp.retrbinary("RETR " + f, _ftp_handle) print "Done downloading", f fout.close() ftp.close() currdir = os.getcwd() # create symlink from latest to our new dump os.chdir("%s/mbrainz/mbdump" % config.DEF_GORDON_DIR) try: os.system("rm latest") except: pass os.system("ln -s %s latest" % dr) os.chdir(currdir)
def do_ftp(site='ftp://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport',force=False) : """Imports fresh database files to DEF_GORDON_DIR/mbrainz/mbdump/latest""" import ftplib ftp=ftplib.FTP('ftp.musicbrainz.org') ftp.login('anonymous','') ftp.cwd('pub/musicbrainz/data/fullexport') for f in ftp.nlst() : if f.startswith('latest-is') : dr=f[10:] print 'Latest is',dr testdir=os.path.join(config.DEF_GORDON_DIR,'mbrainz','mbdump',dr) if os.path.exists(testdir) : print 'We already have this dump... skipping. Set force=True to force download' if not force : ftp.close() return #handle for our writing def _ftp_handle(block) : fout.write(block) print ".", #we should be in the right directory now ftp.cwd(dr) for f in ftp.nlst() : f=f.strip() # Open the file for writing in binary mode fnout=os.path.join(config.DEF_GORDON_DIR,'mbrainz','mbdump',dr,f) make_subdirs(fnout) print 'Opening local file ' + fnout fout= open(fnout, 'wb') print 'downloading',f ftp.retrbinary('RETR ' + f, _ftp_handle) print 'Done downloading',f fout.close() ftp.close() currdir=os.getcwd() #create symlink from latest to our new dump os.chdir('%s/mbrainz/mbdump' % config.DEF_GORDON_DIR) try : os.system('rm latest') except : pass os.system('ln -s %s latest' % dr) os.chdir(currdir)
def features(self, name, read_from_cache=True, save_to_cache=True, **kwargs): """Computes features for this track using the named feature extractor. @raise ValueError: if no FeatureExtractor named <name> is found""" extractor = FeatureExtractor.query.filter_by( name=unicode(name)).first() if not extractor: raise ValueError('No feature extractor named %s' % name) if read_from_cache: featfile = None try: featfile = self._open_feature_cache(mode='r') if featfile.has_features(extractor, kwargs): features = featfile.get_features(extractor, kwargs) return features except: log.debug('Error reading cached features from %s', self.fn_feature) #import traceback; traceback.print_exc() finally: if featfile: featfile.close() Y = extractor.extract_features(self, **kwargs) if save_to_cache: if not os.path.exists(self.fn_feature): from gordon import make_subdirs make_subdirs(self.fn_feature) featfile = self._open_feature_cache(mode='a') featfile.set_features(extractor, Y, kwargs=kwargs) featfile.close() return Y
def register(name, module_path, copy_module_tree=False): """Register a new feature extractor with gordon. The feature extractor should live in the module specified by <module_path>. The module must contain a method called extract_features which takes a track (and any other optional keyword arguments) and returns an array (or tuple of arrays) [1] of feature values. This function's docstring is stored in the FeatureExtractor.description column. The module will be archived with gordon and reloaded whenever FeatureExtractor.extract_features is called. If copy_module_tree == True, the contents of the module's parent directory will also be archived to allow any external dependencies (e.g. libraries such as mlabwrap and/or matlab code) to be stored alongside the module. Dependencies that require re-compilation on different architectures should probably be re-compiled whenever the parent module is imported. [1] Feature caching only works for arrays, or tuples of arrays. """ name = unicode(name) if not name: raise ValueError('Invalid name: %s' % name) if FeatureExtractor.query.filter_by(name=name).count() > 0: # there's an index to avoid this anyway raise ValueError('A FeatureExtractor named "%s" already exists' % name) module = FeatureExtractor._load_module_from_path(module_path) if not 'extract_features' in dir(module): raise ValueError('Feature extractor module must include a ' 'function called extract_features') description = unicode(module.extract_features.__doc__) featext = FeatureExtractor(name=name, description=description) # The new FeatureExtractor needs to be committed to the database in # order to get it's id, which we need before we can copy files. commit() try: featext.module_path = unicode(os.path.join(_get_filedir(featext.id), str(featext.id), 'feature_extractor.py')) module_dir = os.path.dirname(os.path.abspath(module_path)) target_module_dir = os.path.dirname(featext.module_fullpath) from gordon import make_subdirs make_subdirs(os.path.dirname(featext.module_fullpath)) if copy_module_tree: shutil.copytree(module_dir, target_module_dir) else: make_subdirs(featext.module_fullpath) shutil.copy(module_path, target_module_dir) # Rename the module file. module_filename = os.path.basename(module_path) shutil.move(os.path.join(target_module_dir, module_filename), featext.module_fullpath) except: # Roll back commit of broken FeatureExtractor session.delete(featext) commit() raise return featext
def register(name, module_path, copy_module_tree=False): """Register a new feature extractor with gordon. The feature extractor should live in the module specified by <module_path>. The module must contain a method called extract_features which takes a track (and any other optional keyword arguments) and returns an array (or tuple of arrays) [1] of feature values. This function's docstring is stored in the FeatureExtractor.description column. The module will be archived with gordon and reloaded whenever FeatureExtractor.extract_features is called. If copy_module_tree == True, the contents of the module's parent directory will also be archived to allow any external dependencies (e.g. libraries such as mlabwrap and/or matlab code) to be stored alongside the module. Dependencies that require re-compilation on different architectures should probably be re-compiled whenever the parent module is imported. [1] Feature caching only works for arrays, or tuples of arrays. """ name = unicode(name) if not name: raise ValueError('Invalid name: %s' % name) if FeatureExtractor.query.filter_by(name=name).count() > 0: # there's an index to avoid this anyway raise ValueError('A FeatureExtractor named "%s" already exists' % name) module = FeatureExtractor._load_module_from_path(module_path) if not 'extract_features' in dir(module): raise ValueError('Feature extractor module must include a ' 'function called extract_features') description = unicode(module.extract_features.__doc__) featext = FeatureExtractor(name=name, description=description) # The new FeatureExtractor needs to be committed to the database in # order to get it's id, which we need before we can copy files. commit() try: featext.module_path = unicode( os.path.join(_get_filedir(featext.id), str(featext.id), 'feature_extractor.py')) module_dir = os.path.dirname(os.path.abspath(module_path)) target_module_dir = os.path.dirname(featext.module_fullpath) from gordon import make_subdirs make_subdirs(os.path.dirname(featext.module_fullpath)) if copy_module_tree: shutil.copytree(module_dir, target_module_dir) else: make_subdirs(featext.module_fullpath) shutil.copy(module_path, target_module_dir) # Rename the module file. module_filename = os.path.basename(module_path) shutil.move(os.path.join(target_module_dir, module_filename), featext.module_fullpath) except: # Roll back commit of broken FeatureExtractor session.delete(featext) commit() raise return featext