def _translateExtensions( self, fileExtensions=None, expandExtensions=True, ): # noinspection PyShadowingNames ''' Utility to get default extensions, or, optionally, expand extensions to all known formats. >>> coreCorpus = corpus.corpora.CoreCorpus() >>> for extension in coreCorpus._translateExtensions(): ... extension ... '.abc' '.capx' '.mid' '.midi' '.xml' '.mxl' '.musicxml' '.md' '.musedata' '.zip' '.krn' '.rntxt' '.rntext' '.romantext' '.rtxt' '.nwctxt' '.nwc' >>> coreCorpus._translateExtensions('.mid', False) ['.mid'] >>> coreCorpus._translateExtensions('.mid', True) ['.mid', '.midi'] It does not matter if you choose a canonical name or not, the output is the same: >>> coreCorpus._translateExtensions('.musicxml', True) ['.xml', '.mxl', '.musicxml'] >>> coreCorpus._translateExtensions('.xml', True) ['.xml', '.mxl', '.musicxml'] ''' if not common.isListLike(fileExtensions): fileExtensions = [fileExtensions] if len(fileExtensions) == 1 and fileExtensions[0] is None: fileExtensions = Corpus._allExtensions elif expandExtensions: expandedExtensions = [] for extension in fileExtensions: allInputExtensions = common.findInputExtension(extension) if allInputExtensions is None: pass else: expandedExtensions += allInputExtensions return expandedExtensions return fileExtensions
def getPaths(extList=None): '''Get all paths in the corpus that match a known extension, or an extenion provided by an argument. >>> a = getPaths() >>> len(a) > 30 True >>> a = getPaths('krn') >>> len(a) >= 4 True ''' if not common.isListLike(extList): extList = [extList] if extList == [None]: extList = (common.findInputExtension('lily') + common.findInputExtension('musicxml') + common.findInputExtension('humdrum')) #environLocal.printDebug(['getting paths with extensions:', extList]) paths = [] for moduleName in MODULES: if not hasattr(moduleName, '__path__'): # when importing a package name (a directory) the moduleName # may be a list of all paths contained within the package # this seems to be dependent on the context of the call: # from the command line is different than from the interpreter dirListing = moduleName else: # returns a list with one or more paths # the first is the path to the directory that contains xml files dir = moduleName.__path__[0] dirListing = [os.path.join(dir, x) for x in os.listdir(dir)] for fp in dirListing: if fp in paths: continue match = False for ext in extList: if fp.endswith(ext): match = True break if match: if fp not in paths: paths.append(fp) return paths
def getPaths(extList=None, expandExtensions=True): '''Get all paths in the corpus that match a known extension, or an extenion provided by an argument. If `expandExtensions` is True, a format for an extension, and related extensions, will replaced by all known input extensions. This is convenient when an input format might match for multiple extensions. >>> a = getPaths() >>> len(a) > 30 True >>> a = getPaths('krn') >>> len(a) >= 4 True >>> a = getPaths('abc') >>> len(a) >= 10 True ''' if not common.isListLike(extList): extList = [extList] if extList == [None]: extList = _ALL_EXTENSIONS elif expandExtensions: extMod = [] for e in extList: extMod += common.findInputExtension(e) extList = extMod #environLocal.printDebug(['getting paths with extensions:', extList]) paths = [] for moduleName in MODULES: if not hasattr(moduleName, '__path__'): # when importing a package name (a directory) the moduleName # may be a list of all paths contained within the package # this seems to be dependent on the context of the call: # from the command line is different than from the interpreter dirListing = moduleName else: # returns a list with one or more paths # the first is the path to the directory that contains xml files dir = moduleName.__path__[0] dirListing = [os.path.join(dir, x) for x in os.listdir(dir)] for fp in dirListing: if fp in paths: continue match = False for ext in extList: if fp.endswith(ext): match = True break if match: if fp not in paths: paths.append(fp) return paths
def _translateExtensions( self, fileExtensions=None, expandExtensions=True, ): ''' Utility to get default extensions, or, optionally, expand extensions to all known formats. >>> from music21 import corpus >>> coreCorpus = corpus.CoreCorpus() >>> for extension in coreCorpus._translateExtensions(): ... extension ... '.abc' '.capx' '.mid' '.midi' '.xml' '.mxl' '.mx' '.musicxml' '.md' '.musedata' '.zip' '.krn' '.rntxt' '.rntext' '.romantext' '.rtxt' '.nwctxt' '.nwc' >>> coreCorpus._translateExtensions('.mid', False) ['.mid'] >>> coreCorpus._translateExtensions('.mid', True) ['.mid', '.midi'] ''' if not common.isListLike(fileExtensions): fileExtensions = [fileExtensions] if fileExtensions == [None]: fileExtensions = Corpus._allExtensions elif expandExtensions: expandedExtensions = [] for extension in fileExtensions: allInputExtensions = common.findInputExtension(extension) if allInputExtensions is None: pass else: expandedExtensions += allInputExtensions return expandedExtensions return fileExtensions
def getVirtualPaths(extList=None): '''Get all paths in the virtual corpus that match a known extension. An extension of None will return all known extensions. >>> len(getVirtualPaths()) > 6 True ''' if not common.isListLike(extList): extList = [extList] if extList == [None]: extList = (common.findInputExtension('lily') + common.findInputExtension('musicxml') + common.findInputExtension('humdrum')) paths = [] for obj in VIRTUAL: if obj.corpusPath != None: for ext in extList: #environLocal.printDebug([obj.corpusPath, ext]) post = obj.getUrlByExt(ext) for part in post: if part not in paths: paths.append(part) return paths
class Corpus(prebase.ProtoM21Object): r''' Abstract base class of all corpora subclasses. ''' # CLASS VARIABLES # __metaclass__ = abc.ABCMeta # TODO: this is volatile -- should be elsewhere... _acceptableExtensions = [ 'abc', 'capella', 'midi', 'musicxml', 'musedata', 'humdrum', 'romantext', 'noteworthytext', 'noteworthy' ] _allExtensions = tuple( common.flattenList( [common.findInputExtension(x) for x in _acceptableExtensions])) _pathsCache = {} _directoryInformation = () # a tuple of triples -- see coreCorpus parseUsingCorpus = True # SPECIAL METHODS # def _reprInternal(self): return '' # PRIVATE METHODS # def _removeNameFromCache(self, name): keysToRemove = [] for key in list(Corpus._pathsCache): if str(key[0]) == name: keysToRemove.append(key) for key in keysToRemove: del (Corpus._pathsCache[key]) def _findPaths(self, rootDirectoryPath, fileExtensions): ''' Given a root filePath file path, recursively search all contained paths for files in `rootFilePath` matching any of the file extensions in `fileExtensions`. The `fileExtensions` is a list of file file extensions. NB: we've tried optimizing with `fnmatch` but it does not save any time. ''' rdp = common.cleanpath(rootDirectoryPath, returnPathlib=True) matched = [] for filename in sorted(rdp.rglob('*')): if filename.name.startswith('__'): continue if filename.name.startswith('.'): continue for extension in fileExtensions: if filename.suffix.endswith(extension): matched.append(filename) break return matched def _translateExtensions( self, fileExtensions=None, expandExtensions=True, ): ''' Utility to get default extensions, or, optionally, expand extensions to all known formats. >>> coreCorpus = corpus.corpora.CoreCorpus() >>> for extension in coreCorpus._translateExtensions(): ... extension ... '.abc' '.capx' '.mid' '.midi' '.xml' '.mxl' '.mx' '.musicxml' '.md' '.musedata' '.zip' '.krn' '.rntxt' '.rntext' '.romantext' '.rtxt' '.nwctxt' '.nwc' >>> coreCorpus._translateExtensions('.mid', False) ['.mid'] >>> coreCorpus._translateExtensions('.mid', True) ['.mid', '.midi'] ''' if not common.isListLike(fileExtensions): fileExtensions = [fileExtensions] if len(fileExtensions) == 1 and fileExtensions[0] is None: fileExtensions = Corpus._allExtensions elif expandExtensions: expandedExtensions = [] for extension in fileExtensions: allInputExtensions = common.findInputExtension(extension) if allInputExtensions is None: pass else: expandedExtensions += allInputExtensions return expandedExtensions return fileExtensions # PRIVATE PROPERTIES # @property @abc.abstractmethod def cacheFilePath(self): raise NotImplementedError # PUBLIC METHODS # def rebuildMetadataCache(self, useMultiprocessing=True, verbose=True): r''' Rebuild a named bundle from scratch. If a bundle is associated with one of music21's corpora, delete any metadata cache on disk, clear the bundle's contents and reload in all files from that associated corpus. Return the rebuilt metadata bundle. ''' mdb = self.metadataBundle if mdb is None: return self if self.cacheFilePath is None: return self mdb.clear() mdb.delete() self.cacheMetadata(useMultiprocessing=useMultiprocessing, verbose=True) return self.metadataBundle def cacheMetadata(self, useMultiprocessing=True, verbose=True, timer=None): ''' Cache the metadata for a single corpus. ''' def update(message): if verbose is True: environLocal.warn(message) else: environLocal.printDebug(message) if timer is None: timer = common.Timer() timer.start() metadataBundle = self.metadataBundle paths = self.getPaths() update('{} metadata cache: starting processing of paths: {}'.format( self.name, len(paths))) update('cache: filename: {0}'.format(metadataBundle.filePath)) failingFilePaths = metadataBundle.addFromPaths( paths, parseUsingCorpus=self.parseUsingCorpus, useMultiprocessing=useMultiprocessing, verbose=verbose) update('cache: writing time: {0} md items: {1}\n'.format( timer, len(metadataBundle))) update('cache: filename: {0}'.format(metadataBundle.filePath)) del metadataBundle return failingFilePaths @abc.abstractmethod def getPaths(self, fileExtensions=None, expandExtensions=True): r''' The paths of the files in a given corpus. ''' raise NotImplementedError def getWorkList( self, workName, movementNumber=None, fileExtensions=None, ): r''' Search the corpus and return a list of filenames of works, always in a list. If no matches are found, an empty list is returned. >>> from music21 import corpus >>> coreCorpus = corpus.corpora.CoreCorpus() # returns 1 even though there is a '.mus' file, which cannot be read... >>> len(coreCorpus.getWorkList('cpebach/h186')) 1 >>> len(coreCorpus.getWorkList('cpebach/h186', None, '.xml')) 1 >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 3)) 1 >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 2)) 0 Make sure that 'verdi' just gets the single Verdi piece and not the Monteverdi pieces: >>> len(coreCorpus.getWorkList('verdi')) 1 ''' if not common.isListLike(fileExtensions): fileExtensions = [fileExtensions] paths = self.getPaths(fileExtensions) results = [] workPath = pathlib.PurePath(workName) workPosix = workPath.as_posix().lower() # find all matches for the work name # TODO: this should match by path component, not just # substring for path in paths: if workPosix in path.as_posix().lower(): results.append(path) if results: # more than one matched...use more stringent criterion: # must have a slash before the name previousResults = results results = [] for path in previousResults: if '/' + workPosix in path.as_posix().lower(): results.append(path) if not results: results = previousResults movementResults = [] if movementNumber is not None and results: # store one ore more possible mappings of movement number movementStrList = [] # see if this is a pair if common.isIterable(movementNumber): movementStrList.append(''.join(str(x) for x in movementNumber)) movementStrList.append('-'.join( str(x) for x in movementNumber)) movementStrList.append('movement' + '-'.join( str(x) for x in movementNumber)) movementStrList.append('movement' + '-0'.join( str(x) for x in movementNumber)) else: movementStrList += [ '0{0}'.format(movementNumber), str(movementNumber), 'movement{0}'.format(movementNumber), ] for filePath in sorted(results): filename = filePath.name if filePath.suffix: filenameWithoutExtension = filePath.stem else: filenameWithoutExtension = None searchPartialMatch = True if filenameWithoutExtension is not None: # look for direct matches first for movementStr in movementStrList: # if movementStr.lower() in filePath.lower(): if filenameWithoutExtension.lower( ) == movementStr.lower(): movementResults.append(filePath) searchPartialMatch = False # if we have one direct match, all other matches must # be direct. this will match multiple files with different # file extensions if movementResults: continue if searchPartialMatch: for movementStr in movementStrList: if filename.startswith(movementStr.lower()): movementResults.append(filePath) if not movementResults: pass else: movementResults = results return sorted(set(movementResults)) def search(self, query, field=None, fileExtensions=None, **kwargs): r''' Search this corpus for metadata entries, returning a metadataBundle >>> corpus.corpora.CoreCorpus().search('3/4') <music21.metadata.bundles.MetadataBundle {1876 entries}> >>> corpus.corpora.CoreCorpus().search( ... 'bach', ... field='composer', ... ) <music21.metadata.bundles.MetadataBundle {363 entries}> >>> predicate = lambda noteCount: noteCount < 20 >>> corpus.corpora.CoreCorpus().search( ... predicate, ... field='noteCount', ... ) <music21.metadata.bundles.MetadataBundle {134 entries}> ''' return self.metadataBundle.search(query, field=field, fileExtensions=fileExtensions, **kwargs) # PUBLIC PROPERTIES # @property def directoryInformation(self): ''' Returns a tuple of DirectoryInformation objects for a each directory in self._directoryInformation. >>> core = corpus.corpora.CoreCorpus() >>> diBrief = core.directoryInformation[0:5] >>> diBrief (<music21.corpus.work.DirectoryInformation airdsAirs>, <music21.corpus.work.DirectoryInformation bach>, <music21.corpus.work.DirectoryInformation beach>, <music21.corpus.work.DirectoryInformation beethoven>, <music21.corpus.work.DirectoryInformation chopin>) >>> diBrief[2].directoryTitle 'Amy Beach' ''' dirInfo = [] for infoTriple in self._directoryInformation: dirInfo.append( work.DirectoryInformation(*infoTriple, corpusObject=self)) return tuple(dirInfo) @property @abc.abstractmethod def name(self): r''' The name of a given corpus. ''' raise NotImplementedError @property def metadataBundle(self): r''' The metadata bundle for a corpus: >>> from music21 import corpus >>> corpus.corpora.CoreCorpus().metadataBundle <music21.metadata.bundles.MetadataBundle 'core': {151... entries}> As a technical aside, the metadata bundle for a corpus is actually stored in corpus.manager, in order to cache most effectively over multiple calls. There might be good reasons to eventually move them to each Corpus object, so long as its cached across instances of the class. ''' from music21.corpus import manager mdb = manager.getMetadataBundleByCorpus(self) mdb.corpus = self return mdb def all(self): ''' This is a synonym for the metadataBundle property, but easier to understand what it does. >>> from music21 import corpus >>> corpus.corpora.CoreCorpus().all() <music21.metadata.bundles.MetadataBundle 'core': {151... entries}> ''' return self.metadataBundle def getComposer( self, composerName, fileExtensions=None, ): ''' Return all filenames in the corpus that match a composer's or a collection's name. An `fileExtensions`, if provided, defines which extensions are returned. An `fileExtensions` of None (default) returns all extensions. Note that xml and mxl are treated equivalently. >>> from music21 import corpus >>> coreCorpus = corpus.corpora.CoreCorpus() >>> a = coreCorpus.getComposer('bach') >>> len(a) > 100 True >>> a = coreCorpus.getComposer('bach', 'krn') >>> len(a) < 10 True >>> a = coreCorpus.getComposer('bach', 'xml') >>> len(a) > 10 True ''' paths = self.getPaths(fileExtensions) results = [] for path in paths: # iterate through path components; cannot match entire string # composer name may be at any level stubs = path.parts for stub in stubs: # need to remove extension if found if composerName.lower() == stub.lower(): results.append(path) break # get all but the last dot group # this is done for file names that function like composer names elif '.' in stub: newStub = '.'.join(stub.split('.')[:-1]).lower() if newStub == composerName.lower(): results.append(path) break results.sort() return results def getWorkReferences(self): ''' Return a data dictionary for all works in this corpus Returns a list of corpus.work.DirectoryInformation objects, one for each directory. A 'works' dictionary for each composer provides references to dictionaries for all associated works. This is used in the generation of corpus documentation >>> workRefs = corpus.corpora.CoreCorpus().getWorkReferences() >>> workRefs[1:3] [<music21.corpus.work.DirectoryInformation bach>, <music21.corpus.work.DirectoryInformation beach>] ''' results = [di for di in self.directoryInformation] return results
class Corpus(object): r''' Abstract base class of all corpora subclasses. ''' ### CLASS VARIABLES ### __metaclass__ = abc.ABCMeta _allExtensions = (common.findInputExtension('abc') + common.findInputExtension('capella') + common.findInputExtension('midi') + common.findInputExtension('musicxml') + common.findInputExtension('musedata') + common.findInputExtension('humdrum') + common.findInputExtension('romantext') + common.findInputExtension('noteworthytext') + common.findInputExtension('noteworthy')) _pathsCache = {} _directoryInformation = () # a tuple of triples -- see coreCorpus ### SPECIAL METHODS ### def __repr__(self): return '<{0}.{1}>'.format( self.__class__.__module__, self.__class__.__name__, ) ### PRIVATE METHODS ### def _removeNameFromCache(self, name): for key in Corpus._pathsCache.keys(): if key[0] == name: del (Corpus._pathsCache[key]) def _findPaths(self, rootDirectoryPath, fileExtensions): ''' Given a root filePath file path, recursively search all contained paths for files in `rootFilePath` matching any of the file extensions in `fileExtensions`. The `fileExtensions` is a list of file file extensions. NB: we've tried optimizing with `fnmatch` but it does not save any time. ''' from music21 import corpus matched = [] if six.PY2: rootDirectoryPath = six.u(rootDirectoryPath) for rootDirectory, directoryNames, filenames in os.walk( rootDirectoryPath): if '.svn' in directoryNames: directoryNames.remove('.svn') for filename in filenames: try: if filename.startswith('.'): continue except UnicodeDecodeError as error: raise corpus.CorpusException( 'Incorrect filename in corpus path: {0}: {1!r}'.format( filename, error)) for extension in fileExtensions: if filename.endswith(extension): matched.append(os.path.join(rootDirectory, filename)) break return matched def _translateExtensions( self, fileExtensions=None, expandExtensions=True, ): ''' Utility to get default extensions, or, optionally, expand extensions to all known formats. >>> coreCorpus = corpus.corpora.CoreCorpus() >>> for extension in coreCorpus._translateExtensions(): ... extension ... '.abc' '.capx' '.mid' '.midi' '.xml' '.mxl' '.mx' '.musicxml' '.md' '.musedata' '.zip' '.krn' '.rntxt' '.rntext' '.romantext' '.rtxt' '.nwctxt' '.nwc' >>> coreCorpus._translateExtensions('.mid', False) ['.mid'] >>> coreCorpus._translateExtensions('.mid', True) ['.mid', '.midi'] ''' if not common.isListLike(fileExtensions): fileExtensions = [fileExtensions] if len(fileExtensions) == 1 and fileExtensions[0] is None: fileExtensions = Corpus._allExtensions elif expandExtensions: expandedExtensions = [] for extension in fileExtensions: allInputExtensions = common.findInputExtension(extension) if allInputExtensions is None: pass else: expandedExtensions += allInputExtensions return expandedExtensions return fileExtensions ### PRIVATE PROPERTIES ### @abc.abstractproperty def cacheName(self): raise NotImplementedError ### PUBLIC METHODS ### @abc.abstractmethod def getPaths(self, fileExtensions=None, expandExtensions=True): r''' The paths of the files in a given corpus. ''' raise NotImplementedError def getWorkList( self, workName, movementNumber=None, fileExtensions=None, ): r''' Search the corpus and return a list of filenames of works, always in a list. If no matches are found, an empty list is returned. >>> from music21 import corpus >>> coreCorpus = corpus.corpora.CoreCorpus() # returns 1 even though there is a '.mus' file, which cannot be read... >>> len(coreCorpus.getWorkList('cpebach/h186')) 1 >>> len(coreCorpus.getWorkList('cpebach/h186', None, '.xml')) 1 >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 3)) 1 >>> len(coreCorpus.getWorkList('schumann_clara/opus17', 2)) 0 Make sure that 'verdi' just gets the single Verdi piece and not the Monteverdi pieces: >>> len(coreCorpus.getWorkList('verdi')) 1 ''' if not common.isListLike(fileExtensions): fileExtensions = [fileExtensions] paths = self.getPaths(fileExtensions) results = [] # permit workName to be a list of paths/branches if common.isIterable(workName): workName = os.path.sep.join(workName) workSlashes = workName.replace('/', os.path.sep) # find all matches for the work name # TODO: this should match by path component, not just # substring for path in paths: if workName.lower() in path.lower(): results.append(path) elif workSlashes.lower() in path.lower(): results.append(path) if results: # more than one matched...use more stringent criterion: # must have a slash before the name previousResults = results results = [] longName = os.sep + workSlashes.lower() for path in previousResults: if longName in path.lower(): results.append(path) if not results: results = previousResults movementResults = [] if movementNumber is not None and results: # store one ore more possible mappings of movement number movementStrList = [] # see if this is a pair if common.isIterable(movementNumber): movementStrList.append(''.join(str(x) for x in movementNumber)) movementStrList.append('-'.join( str(x) for x in movementNumber)) movementStrList.append('movement' + '-'.join( str(x) for x in movementNumber)) movementStrList.append('movement' + '-0'.join( str(x) for x in movementNumber)) else: movementStrList += [ '0{0}'.format(movementNumber), str(movementNumber), 'movement{0}'.format(movementNumber), ] for filePath in sorted(results): filename = os.path.split(filePath)[1] if '.' in filename: filenameWithoutExtension = os.path.splitext(filename)[0] else: filenameWithoutExtension = None searchPartialMatch = True if filenameWithoutExtension is not None: # look for direct matches first for movementStr in movementStrList: #if movementStr.lower() in filePath.lower(): if filenameWithoutExtension.lower( ) == movementStr.lower(): movementResults.append(filePath) searchPartialMatch = False # if we have one direct match, all other matches must # be direct. this will match multiple files with different # file extensions if movementResults: continue if searchPartialMatch: for movementStr in movementStrList: if filename.startswith(movementStr.lower()): movementResults.append(filePath) if not movementResults: pass else: movementResults = results return sorted(set(movementResults)) def search(self, query, field=None, fileExtensions=None): r''' Search this corpus for metadata entries, returning a metadataBundle >>> corpus.corpora.CoreCorpus().search('3/4') <music21.metadata.bundles.MetadataBundle {1870 entries}> >>> corpus.corpora.CoreCorpus().search( ... 'bach', ... field='composer', ... ) <music21.metadata.bundles.MetadataBundle {22 entries}> >>> predicate = lambda noteCount: noteCount < 20 >>> corpus.corpora.CoreCorpus().search( ... predicate, ... field='noteCount', ... ) <music21.metadata.bundles.MetadataBundle {134 entries}> ''' return self.metadataBundle.search( query, field=field, fileExtensions=fileExtensions, ) ### PUBLIC PROPERTIES ### @property def directoryInformation(self): ''' Returns a tuple of DirectoryInformation objects for a each directory in self._directoryInformation. >>> core = corpus.corpora.CoreCorpus() >>> diBrief = core.directoryInformation[0:4] >>> diBrief (<music21.corpus.work.DirectoryInformation airdsAirs>, <music21.corpus.work.DirectoryInformation bach>, <music21.corpus.work.DirectoryInformation beethoven>, <music21.corpus.work.DirectoryInformation ciconia>) >>> diBrief[3].directoryTitle 'Johannes Ciconia' ''' dirInfo = [] for infoTriple in self._directoryInformation: dirInfo.append( work.DirectoryInformation(*infoTriple, corpusObject=self)) return tuple(dirInfo) @abc.abstractproperty def name(self): r''' The name of a given corpus. ''' raise NotImplementedError @property def metadataBundle(self): r''' The metadata bundle for a corpus: >>> from music21 import corpus >>> corpus.corpora.CoreCorpus().metadataBundle <music21.metadata.bundles.MetadataBundle 'core': {144... entries}> As a technical aside, the metadata bundle for a corpus is actually stored in corpus.manager, in order to cache most effectively over multiple calls. There might be good reasons to eventually move them to each Corpus object, so long as its cached across instances of the class. ''' from music21.corpus import manager return manager.getMetadataBundleByCorpus(self) def getComposer( self, composerName, fileExtensions=None, ): ''' Return all filenames in the corpus that match a composer's or a collection's name. An `fileExtensions`, if provided, defines which extensions are returned. An `fileExtensions` of None (default) returns all extensions. Note that xml and mxl are treated equivalently. >>> from music21 import corpus >>> coreCorpus = corpus.corpora.CoreCorpus() >>> a = coreCorpus.getComposer('bach') >>> len(a) > 100 True >>> a = coreCorpus.getComposer('bach', 'krn') >>> len(a) < 10 True >>> a = coreCorpus.getComposer('bach', 'xml') >>> len(a) > 10 True ''' paths = self.getPaths(fileExtensions) results = [] for path in paths: # iterate through path components; cannot match entire string # composer name may be at any level stubs = path.split(os.sep) for stub in stubs: # need to remove extension if found if composerName.lower() == stub.lower(): results.append(path) break # get all but the last dot group # this is done for file names that function like composer names elif '.' in stub: newStub = '.'.join(stub.split('.')[:-1]).lower() if newStub == composerName.lower(): results.append(path) break results.sort() return results def getWorkReferences(self): ''' Return a data dictionary for all works in this corpus Returns a list of corpus.work.DirectoryInformation objects, one for each directory. A 'works' dictionary for each composer provides references to dictionaries for all associated works. This is used in the generation of corpus documentation >>> workRefs = corpus.corpora.CoreCorpus().getWorkReferences() >>> workRefs[1:3] [<music21.corpus.work.DirectoryInformation bach>, <music21.corpus.work.DirectoryInformation beethoven>] ''' results = [di for di in self.directoryInformation] return results
schumann, opus41no1, luca, bach, bwv1080, ] # a list of metadataCache's can reside in this module-level storage; this # data is loaded on demand. _METADATA_BUNDLES = {'core':None, 'virtual':None, 'local':None} _ALL_EXTENSIONS = (common.findInputExtension('abc') + common.findInputExtension('lily') + common.findInputExtension('musicxml') + common.findInputExtension('musedata') + common.findInputExtension('humdrum')) # store all composers in the corpus (not virtual) # as two element tuples of path name, full name COMPOSERS = [ ('beethoven', 'Ludwig van Beethoven'), ('ciconia', 'Johannes Ciconia'), ('haydn', 'Joseph Haydn'), ('handel', 'George Frideric Handel'), ('mozart', 'Wolfgang Amadeus Mozart'), ('schoenberg', 'Arnold Schoenberg'), ('schumann', 'Robert Schumann'),